diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000..f8a6222c5 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,88 @@ + +cmake_minimum_required(VERSION 2.8) +project(jetson-inference) + + +# setup GIE +set(GIE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../GIE" CACHE FILEPATH "Path to GPU Inference Engine") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") # -std=gnu++11 + +set(BUILD_DEPS "YES" CACHE BOOL "If YES, will install dependencies into sandbox. Automatically reset to NO after dependencies are installed.") + + +# if this is the first time running cmake, perform pre-build dependency install script (or if the user manually triggers re-building the dependencies) +if( ${BUILD_DEPS} ) + message("Launching pre-build dependency installer script...") + + execute_process(COMMAND sh ../CMakePreBuild.sh + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + RESULT_VARIABLE PREBUILD_SCRIPT_RESULT) + + set(BUILD_DEPS "NO" CACHE BOOL "If YES, will install dependencies into sandbox. Automatically reset to NO after dependencies are installed." FORCE) + message("Finished installing dependencies") +endif() + + +# Qt4 is used to load images (installed by ubuntu-desktop) +find_package(Qt4 REQUIRED) +include(${QT_USE_FILE}) +add_definitions(${QT_DEFINITIONS}) + + +# setup CUDA +find_package(CUDA) + +set( + CUDA_NVCC_FLAGS + ${CUDA_NVCC_FLAGS}; + -std=c++11 -O3 -gencode arch=compute_53,code=sm_53 +) + + +# setup project output paths +set(PROJECT_OUTPUT_DIR ${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_PROCESSOR}) +set(PROJECT_INCLUDE_DIR ${PROJECT_OUTPUT_DIR}/include) + +file(MAKE_DIRECTORY ${PROJECT_INCLUDE_DIR}) +file(MAKE_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin) + +message("-- system arch: ${CMAKE_SYSTEM_PROCESSOR}") +message("-- output path: ${PROJECT_OUTPUT_DIR}") + +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib) + + +# build C/C++ interface +include_directories(${PROJECT_INCLUDE_DIR} ${GIE_PATH}/include) +include_directories(/usr/include/gstreamer-1.0 /usr/include/glib-2.0 /usr/include/libxml2 /usr/lib/aarch64-linux-gnu/glib-2.0/include/) +link_directories(${GIE_PATH}/lib) + +file(GLOB inferenceSources *.cpp *.cu camera/*.cpp cuda/*.cu display/*.cpp) +file(GLOB inferenceIncludes *.h camera/*.h cuda/*.h display/*.h) + +cuda_add_library(jetson-inference SHARED ${inferenceSources}) +target_link_libraries(jetson-inference nvcaffe_parser nvinfer Qt4::QtGui GL GLEW gstreamer-1.0 gstapp-1.0) # gstreamer-0.10 gstbase-0.10 gstapp-0.10 + +# transfer all headers to the include directory +foreach(include ${inferenceIncludes}) + message("-- Copying ${include}") + configure_file(${include} ${PROJECT_INCLUDE_DIR} COPYONLY) +endforeach() + +# copy network data +file(GLOB networkData ${PROJECT_SOURCE_DIR}/data/networks/* ${PROJECT_SOURCE_DIR}/data/images/*) + +foreach(include ${networkData}) + message("-- Copying ${include}") + configure_file(${include} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} COPYONLY) +endforeach() + +# build samples & tools +add_subdirectory(imagenet-console) +add_subdirectory(imagenet-camera) +add_subdirectory(camera/gst-camera) +add_subdirectory(camera/v4l2-console) +add_subdirectory(camera/v4l2-display) + diff --git a/CMakePreBuild.sh b/CMakePreBuild.sh new file mode 100644 index 000000000..1ac0995d6 --- /dev/null +++ b/CMakePreBuild.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +# this script is automatically run from CMakeLists.txt + +BUILD_ROOT=$PWD +TORCH_PREFIX=$PWD/torch + +echo "[Pre-build] dependency installer script running..." +echo "[Pre-build] build root directory: $BUILD_ROOT" + + +sudo apt-get update +sudo apt-get install -y qt4-dev-tools libglew-dev glew-utils libgstreamer1.0-dev +# libgstreamer0.10-0-dev libgstreamer-plugins-base0.10-dev libxml2-dev +sudo apt-get update + + +# libgstreamer-plugins-base1.0-dev + +sudo rm /usr/lib/aarch64-linux-gnu/libGL.so +sudo ln -s /usr/lib/aarch64-linux-gnu/tegra/libGL.so /usr/lib/aarch64-linux-gnu/libGL.so + + +wget http://dl.caffe.berkeleyvision.org/bvlc_alexnet.caffemodel +mv bvlc_alexnet.caffemodel ../data/networks + +wget http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel +mv bvlc_googlenet.caffemodel ../data/networks diff --git a/README.md b/README.md index 7c988aff1..474ba6a65 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,216 @@ # jetson-inference -Guide to deploying deep-learning inference networks and end-to-end object recognition tutorial for NVIDIA Jetson TX1. +Welcome to NVIDIA's deep learning inference workshop and end-to-end object recognition library for Jetson TX1. + + +![Alt text](https://a70ad2d16996820e6285-3c315462976343d903d5b3a03b69072d.ssl.cf2.rackcdn.com/0e7182cddd632abe6832849776204911) + + +### Table of Contents + +* [Table of Contents](#table-of-contents) +* [Introduction](#introduction) + * [Training](#training) + * [DIGITS](#digits) + * [Inference](#inference) +* [Building nvcaffe](#building-nvcaffe) +* [Installing GPU Inference Engine](#installing-gpu-inference-engine) + +> **note**: this branch of the tutorial uses +> JetPack 2.2 / L4T R24.1 aarch64. + +### Introduction + +*Deep-learning* networks typically have two primary phases of development: **training** and **inference** + +#### Training +During the training phase, the network learns from a large dataset of labeled examples. The weights of the neural network become optimized to recognize the patterns contained within the training dataset. Deep neural networks have many layers of neurons connected togethers. Deeper networks take increasingly longer to train and evaluate, but are ultimately able to encode more intelligence within them. + +![Alt text](https://a70ad2d16996820e6285-3c315462976343d903d5b3a03b69072d.ssl.cf2.rackcdn.com/fd4ba9e7e68b76fc41c8312856c7d0ad) + +Throughout training, the network's inference performance is tested and refined using trial dataset. Like the training dataset, the trial dataset is labeled with ground-truth so the network's accuracy can be evaluated, but was not included in the training dataset. The network continues to train iteratively until it reaches a certain level of accuracy set by the user. + +Due to the size of the datasets and deep inference networks, training is typically very resource-intensive and can take weeks or months on traditional compute architectures. However, using GPUs vastly accellerates the process down to days or hours. + +##### DIGITS + +Using [DIGITS](https://developer.nvidia.com/digits), anyone can easily get started and interactively train their networks with GPU acceleration.
DIGITS is an open-source project contributed by NVIDIA, located here: https://github.com/NVIDIA/DIGITS. + +This tutorial will use DIGITS and Jetson TX1 together for training and deploying deep-learning networks,
refered to as the DIGITS workflow: + +![Alt text](https://a70ad2d16996820e6285-3c315462976343d903d5b3a03b69072d.ssl.cf2.rackcdn.com/90bde1f85a952157b914f75a9f8739c2) + + +#### Inference +Using it's trained weights, the network evaluates live data at runtime. Called inference, the network predicts and applies reasoning based off the examples it learned. Due to the depth of deep learning networks, inference requires significant compute resources to process in realtime on imagery and other sensor data. However, using NVIDIA's GPU Inference Engine which uses Jetson's integrated NVIDIA GPU, inference can be deployed onboard embedded platforms. Applications in robotics like picking, autonomous navigation, agriculture, and industrial inspection have many uses for deploying deep inference, including: + + - Image recognition + - Object detection + - Segmentation + - Image registration (homography estimation) + - Depth from raw stereo + - Signal analytics + - Others? + + +## Building nvcaffe + +A special branch of caffe is used on TX1 which includes support for FP16.
+The code is released in NVIDIA's caffe repo in the experimental/fp16 branch, located here: +> https://github.com/nvidia/caffe/tree/experimental/fp16 + +#### 1. Installing Dependencies + +``` bash +$ sudo apt-get install protobuf-compiler libprotobuf-dev cmake git libboost-thread1.55-dev libgflags-dev libgoogle-glog-dev libhdf5-dev libatlas-dev libatlas-base-dev libatlas3-base liblmdb-dev libleveldb-dev +``` + +The Snappy package needs a symbolic link created for Caffe to link correctly: + +``` bash +$ sudo ln -s /usr/lib/libsnappy.so.1 /usr/lib/libsnappy.so +$ sudo ldconfig +``` + +#### 2. Clone nvcaffe fp16 branch + +``` bash +$ git clone -b experimental/fp16 https://github.com/NVIDIA/caffe +``` + +This will checkout the repo to a local directory called `caffe` on your Jetson. + +#### 3. Setup build options + +``` bash +$ cd caffe +$ cp Makefile.config.example Makefile.config +``` + +###### Enable FP16: + +``` bash +$ sed -i 's/# NATIVE_FP16/NATIVE_FP16/g' Makefile.config +``` + +###### Enable cuDNN: + +``` bash +$ sed -i 's/# USE_CUDNN/USE_CUDNN/g' Makefile.config +``` + +###### Enable compute_53/sm_53: + +``` bash +$ sed -i 's/-gencode arch=compute_50,code=compute_50/-gencode arch=compute_53,code=sm_53 -gencode arch=compute_53,code=compute_53/g' Makefile.config +``` + +#### 4. Compiling nvcaffe + +``` bash +$ make all +$ make test +``` + +#### 5. Testing nvcaffe + +``` bash +$ make runtest +``` + +## Installing GPU Inference Engine + +NVIDIA's [GPU Inference Engine](https://developer.nvidia.com/gie) (GIE) is an optimized backend for evaluating deep inference networks in prototxt format. + +#### 1. Package contents + +First, unzip the archive: +``` +$ tar -zxvf gie.aarch64-cuda7.0-1.0-ea.tar.gz +``` + +The directory structure is as follows: +``` +|-GIE +| \bin where the samples are built to +| \data sample network model / prototxt's +| \doc API documentation and User Guide +| \include +| \lib +| \samples +``` + +#### 2. Remove packaged cuDNN + +If you flashed your Jetson TX1 with JetPack or already have cuDNN installed, remove the version of cuDNN that comes with GIE: + +``` +$ cd GIE/lib +$ rm libcudnn* +$ cd ../../ +``` + +#### 3. Build samples + +```` +$ cd GIE/samples/sampleMNIST +$ make TARGET=tx1 +Compiling: sampleMNIST.cpp +Linking: ../../bin/sample_mnist_debug +Compiling: sampleMNIST.cpp +Linking: ../../bin/sample_mnist +$ cd ../sampleGoogleNet +$ make TARGET=tx1 +Compiling: sampleGoogleNet.cpp +Linking: ../../bin/sample_googlenet_debug +Compiling: sampleGoogleNet.cpp +Linking: ../../bin/sample_googlenet +$ cd ../../../ +```` + +#### 4. Running samples + +```` +$ cd GIE/bin +$ ./sample_mnist +@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@@@@@@@@@%+-: =@@@@@@@@@@@@ +@@@@@@@%= -@@@**@@@@@@@ +@@@@@@@ :%#@-#@@@. #@@@@@@ +@@@@@@* +@@@@:*@@@ *@@@@@@ +@@@@@@# +@@@@ @@@% @@@@@@@ +@@@@@@@. :%@@.@@@. *@@@@@@@ +@@@@@@@@- =@@@@. -@@@@@@@@ +@@@@@@@@@%: +@- :@@@@@@@@@ +@@@@@@@@@@@%. : -@@@@@@@@@@ +@@@@@@@@@@@@@+ #@@@@@@@@@@ +@@@@@@@@@@@@@@+ :@@@@@@@@@@ +@@@@@@@@@@@@@@+ *@@@@@@@@@ +@@@@@@@@@@@@@@: = @@@@@@@@@ +@@@@@@@@@@@@@@ :@ @@@@@@@@@ +@@@@@@@@@@@@@@ -@ @@@@@@@@@ +@@@@@@@@@@@@@# +@ @@@@@@@@@ +@@@@@@@@@@@@@* ++ @@@@@@@@@ +@@@@@@@@@@@@@* *@@@@@@@@@ +@@@@@@@@@@@@@# =@@@@@@@@@@ +@@@@@@@@@@@@@@. +@@@@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + +0: +1: +2: +3: +4: +5: +6: +7: +8: ********** +9: +```` +The MNIST sample randomly selects an image of a numeral 0-9, which is then classified with the MNIST network using GIE. In this example, the network correctly recognized the image as #8. + diff --git a/caffeToGIE.h b/caffeToGIE.h new file mode 100644 index 000000000..12f9e75a1 --- /dev/null +++ b/caffeToGIE.h @@ -0,0 +1,86 @@ +/* + * inference-101 + */ + +#ifndef __GIE_CAFFE_H +#define __GIE_CAFFE_H + + +#include "Infer.h" +#include "caffeParser.h" +#include "logGIE.h" + + +/** + * Create an optimized GIE network from caffe prototxt and model file. + */ +bool caffeToGIEModel(const std::string& deployFile, // name for caffe prototxt + const std::string& modelFile, // name for model + const std::vector& outputs, // network outputs + unsigned int maxBatchSize, // batch size - NB must be at least as large as the batch we want to run with) + std::ostream& gieModelStream) // output stream for the GIE model +{ + // create API root class - must span the lifetime of the engine usage + nvinfer1::IBuilder* builder = createInferBuilder(gLogger); + nvinfer1::INetworkDefinition* network = builder->createNetwork(); + + // parse the caffe model to populate the network, then set the outputs + nvcaffeparser1::CaffeParser* parser = new nvcaffeparser1::CaffeParser; + + const bool useFp16 = builder->plaformHasFastFp16(); + printf(LOG_GIE "platform %s FP16 support.\n", useFp16 ? "has" : "does not have"); + printf(LOG_GIE "loading %s %s\n", deployFile.c_str(), modelFile.c_str()); + + nvinfer1::DataType modelDataType = useFp16 ? nvinfer1::DataType::kHALF : nvinfer1::DataType::kFLOAT; // create a 16-bit model if it's natively supported + const nvcaffeparser1::IBlobNameToTensor *blobNameToTensor = + parser->parse(deployFile.c_str(), // caffe deploy file + modelFile.c_str(), // caffe model file + *network, // network definition that the parser will populate + modelDataType); + + if( !blobNameToTensor ) + { + printf(LOG_GIE "failed to parse caffe network\n"); + return false; + } + + // the caffe file has no notion of outputs, so we need to manually say which tensors the engine should generate + const size_t num_outputs = outputs.size(); + + for( size_t n=0; n < num_outputs; n++ ) + network->markOutput(*blobNameToTensor->find(outputs[n].c_str())); + + + // Build the engine + printf(LOG_GIE "configuring CUDA engine\n"); + + builder->setMaxBatchSize(maxBatchSize); + builder->setMaxWorkspaceSize(16 << 20); + + // set up the network for paired-fp16 format, only on DriveCX + if(useFp16) + builder->setHalf2Mode(true); + + printf(LOG_GIE "building CUDA engine\n"); + nvinfer1::ICudaEngine* engine = builder->buildCudaEngine(*network); + + if( !engine ) + { + printf(LOG_GIE "failed to build CUDA engine\n"); + return false; + } + + // we don't need the network any more, and we can destroy the parser + network->destroy(); + delete parser; + + // serialize the engine, then close everything down + engine->serialize(gieModelStream); + engine->destroy(); + builder->destroy(); + + return true; +} + + +#endif diff --git a/camera/gst-camera/CMakeLists.txt b/camera/gst-camera/CMakeLists.txt new file mode 100644 index 000000000..533da4cbc --- /dev/null +++ b/camera/gst-camera/CMakeLists.txt @@ -0,0 +1,6 @@ + +file(GLOB gstCameraSources *.cpp) +file(GLOB gstCameraIncludes *.h ) + +add_executable(gst-camera ${gstCameraSources}) +target_link_libraries(gst-camera jetson-inference) diff --git a/camera/gst-camera/gst-camera.cpp b/camera/gst-camera/gst-camera.cpp new file mode 100644 index 000000000..134f3c516 --- /dev/null +++ b/camera/gst-camera/gst-camera.cpp @@ -0,0 +1,167 @@ +/* + * inference-101 + */ + +#include "gstCamera.h" + +#include "glDisplay.h" +#include "glTexture.h" + +#include +#include +#include + +#include "cudaNormalize.h" + + +bool signal_recieved = false; + +void sig_handler(int signo) +{ + if( signo == SIGINT ) + { + printf("received SIGINT\n"); + signal_recieved = true; + } +} + + +int main( int argc, char** argv ) +{ + printf("gst-camera\n args (%i): ", argc); + + for( int i=0; i < argc; i++ ) + printf("%i [%s] ", i, argv[i]); + + printf("\n"); + + + if( signal(SIGINT, sig_handler) == SIG_ERR ) + printf("\ncan't catch SIGINT\n"); + + /* + * create the camera device + */ + gstCamera* camera = gstCamera::Create(); + + if( !camera ) + { + printf("\ngst-camera: failed to initialize video device\n"); + return 0; + } + + printf("\ngst-camera: successfully initialized video device\n"); + printf(" width: %u\n", camera->GetWidth()); + printf(" height: %u\n", camera->GetHeight()); + printf(" depth: %u (bpp)\n", camera->GetPixelDepth()); + + + + /* + * create openGL window + */ + glDisplay* display = glDisplay::Create(); + + if( !display ) + printf("\ngst-camera: failed to create openGL display\n"); + + const size_t texSz = camera->GetWidth() * camera->GetHeight() * sizeof(float4); + float4* texIn = (float4*)malloc(texSz); + + /*if( texIn != NULL ) + memset(texIn, 0, texSz);*/ + + if( texIn != NULL ) + for( uint32_t y=0; y < camera->GetHeight(); y++ ) + for( uint32_t x=0; x < camera->GetWidth(); x++ ) + texIn[y*camera->GetWidth()+x] = make_float4(0.0f, 1.0f, 1.0f, 1.0f); + + glTexture* texture = glTexture::Create(camera->GetWidth(), camera->GetHeight(), GL_RGBA32F_ARB/*GL_RGBA8*/, texIn); + + if( !texture ) + printf("gst-camera: failed to create openGL texture\n"); + + + + /* + * start streaming + */ + if( !camera->Open() ) + { + printf("\ngst-camera: failed to open camera for streaming\n"); + return 0; + } + + printf("\ngst-camera: camera open for streaming\n"); + + + while( !signal_recieved ) + { + void* imgCPU = NULL; + void* imgCUDA = NULL; + + // get the latest frame + if( !camera->Capture(&imgCPU, &imgCUDA, 1000) ) + printf("\ngst-camera: failed to capture frame\n"); + else + printf("gst-camera: recieved new frame CPU=0x%p GPU=0x%p\n", imgCPU, imgCUDA); + + // convert from YUV to RGBA + void* imgRGBA = NULL; + + if( !camera->ConvertRGBA(imgCUDA, &imgRGBA) ) + printf("gst-camera: failed to convert from NV12 to RGBA\n"); + + // rescale image pixel intensities + CUDA(cudaNormalizeRGBA((float4*)imgRGBA, make_float2(0.0f, 255.0f), + (float4*)imgRGBA, make_float2(0.0f, 1.0f), + camera->GetWidth(), camera->GetHeight())); + + // update display + if( display != NULL ) + { + display->UserEvents(); + display->BeginRender(); + + if( texture != NULL ) + { + void* tex_map = texture->MapCUDA(); + + if( tex_map != NULL ) + { + cudaMemcpy(tex_map, imgRGBA, texture->GetSize(), cudaMemcpyDeviceToDevice); + CUDA(cudaDeviceSynchronize()); + + texture->Unmap(); + } + //texture->UploadCPU(texIn); + + texture->Render(100,100); + } + + display->EndRender(); + } + } + + printf("\ngst-camera: un-initializing video device\n"); + + + /* + * shutdown the camera device + */ + if( camera != NULL ) + { + delete camera; + camera = NULL; + } + + if( display != NULL ) + { + delete display; + display = NULL; + } + + printf("gst-camera: video device has been un-initialized.\n"); + printf("gst-camera: this concludes the test of the video device.\n"); + return 0; +} diff --git a/camera/gstCamera.cpp b/camera/gstCamera.cpp new file mode 100644 index 000000000..31f2f00f9 --- /dev/null +++ b/camera/gstCamera.cpp @@ -0,0 +1,442 @@ +/* + * inference-101 + */ + +#include "gstCamera.h" +#include "gstUtility.h" + +#include +#include + +#include +#include +#include + +#include +#include + +#include "cudaMappedMemory.h" +#include "cudaYUV.h" + + + + +// constructor +gstCamera::gstCamera() +{ + mAppSink = NULL; + mBus = NULL; + mPipeline = NULL; + mRGBA = NULL; + + mWidth = 0; + mHeight = 0; + mDepth = 0; + mSize = 0; + + mWaitEvent = new QWaitCondition(); + mWaitMutex = new QMutex(); + mRingMutex = new QMutex(); + + mLatestRingbuffer = 0; + mLatestRetrieved = false; + + for( uint32_t n=0; n < NUM_RINGBUFFERS; n++ ) + { + mRingbufferCPU[n] = NULL; + mRingbufferGPU[n] = NULL; + } +} + + +// destructor +gstCamera::~gstCamera() +{ + +} + + +// ConvertRGBA +bool gstCamera::ConvertRGBA( void* input, void** output ) +{ + if( !input || !output ) + return false; + + if( !mRGBA ) + { + if( CUDA_FAILED(cudaMalloc(&mRGBA, mWidth * mHeight * sizeof(float4))) ) + { + printf(LOG_CUDA "gstCamera -- failed to allocate memory for %ux%u RGBA texture\n", mWidth, mHeight); + return false; + } + } + + if( CUDA_FAILED(cudaNV12ToRGBAf((uint8_t*)input, (float4*)mRGBA, mWidth, mHeight)) ) + return false; + + *output = mRGBA; + return true; +} + + +// onEOS +void gstCamera::onEOS(_GstAppSink* sink, void* user_data) +{ + printf(LOG_GSTREAMER "gstreamer decoder onEOS\n"); +} + + +// onPreroll +GstFlowReturn gstCamera::onPreroll(_GstAppSink* sink, void* user_data) +{ + printf(LOG_GSTREAMER "gstreamer decoder onPreroll\n"); + return GST_FLOW_OK; +} + + +// onBuffer +GstFlowReturn gstCamera::onBuffer(_GstAppSink* sink, void* user_data) +{ + //printf(LOG_GSTREAMER "gstreamer decoder onBuffer\n"); + + if( !user_data ) + return GST_FLOW_OK; + + gstCamera* dec = (gstCamera*)user_data; + + dec->checkBuffer(); + dec->checkMsgBus(); + return GST_FLOW_OK; +} + + +// Capture +bool gstCamera::Capture( void** cpu, void** cuda, unsigned long timeout ) +{ + mWaitMutex->lock(); + const bool wait_result = mWaitEvent->wait(mWaitMutex, timeout); + mWaitMutex->unlock(); + + if( !wait_result ) + return false; + + mRingMutex->lock(); + const uint32_t latest = mLatestRingbuffer; + const bool retrieved = mLatestRetrieved; + mLatestRetrieved = true; + mRingMutex->unlock(); + + // skip if it was already retrieved + if( retrieved ) + return false; + + if( cpu != NULL ) + *cpu = mRingbufferCPU[latest]; + + if( cuda != NULL ) + *cuda = mRingbufferGPU[latest]; + + return true; +} + + +#define release_return { gst_sample_unref(gstSample); return; } + + +// checkBuffer +void gstCamera::checkBuffer() +{ + if( !mAppSink ) + return; + + // block waiting for the buffer + GstSample* gstSample = gst_app_sink_pull_sample(mAppSink); + + if( !gstSample ) + { + printf(LOG_GSTREAMER "gstreamer camera -- gst_app_sink_pull_sample() returned NULL...\n"); + return; + } + + GstBuffer* gstBuffer = gst_sample_get_buffer(gstSample); + + if( !gstBuffer ) + { + printf(LOG_GSTREAMER "gstreamer camera -- gst_sample_get_buffer() returned NULL...\n"); + return; + } + + // retrieve + GstMapInfo map; + + if( !gst_buffer_map(gstBuffer, &map, GST_MAP_READ) ) + { + printf(LOG_GSTREAMER "gstreamer camera -- gst_buffer_map() failed...\n"); + return; + } + + //gst_util_dump_mem(map.data, map.size); + + void* gstData = map.data; //GST_BUFFER_DATA(gstBuffer); + const uint32_t gstSize = map.size; //GST_BUFFER_SIZE(gstBuffer); + + if( !gstData ) + { + printf(LOG_GSTREAMER "gstreamer camera -- gst_buffer had NULL data pointer...\n"); + release_return; + } + + // retrieve caps + GstCaps* gstCaps = gst_sample_get_caps(gstSample); + + if( !gstCaps ) + { + printf(LOG_GSTREAMER "gstreamer camera -- gst_buffer had NULL caps...\n"); + release_return; + } + + GstStructure* gstCapsStruct = gst_caps_get_structure(gstCaps, 0); + + if( !gstCapsStruct ) + { + printf(LOG_GSTREAMER "gstreamer camera -- gst_caps had NULL structure...\n"); + release_return; + } + + // get width & height of the buffer + int width = 0; + int height = 0; + + if( !gst_structure_get_int(gstCapsStruct, "width", &width) || + !gst_structure_get_int(gstCapsStruct, "height", &height) ) + { + printf(LOG_GSTREAMER "gstreamer camera -- gst_caps missing width/height...\n"); + release_return; + } + + if( width < 1 || height < 1 ) + release_return; + + mWidth = width; + mHeight = height; + mDepth = (gstSize * 8) / (width * height); + mSize = gstSize; + + //printf(LOG_GSTREAMER "gstreamer camera recieved %ix%i frame (%u bytes, %u bpp)\n", width, height, gstSize, mDepth); + + // make sure ringbuffer is allocated + if( !mRingbufferCPU[0] ) + { + for( uint32_t n=0; n < NUM_RINGBUFFERS; n++ ) + { + if( !cudaAllocMapped(&mRingbufferCPU[n], &mRingbufferGPU[n], gstSize) ) + printf(LOG_CUDA "gstreamer camera -- failed to allocate ringbuffer %u (size=%u)\n", n, gstSize); + } + + printf(LOG_CUDA "gstreamer camera -- allocated %u ringbuffers, %u bytes each\n", NUM_RINGBUFFERS, gstSize); + } + + // copy to next ringbuffer + const uint32_t nextRingbuffer = (mLatestRingbuffer + 1) % NUM_RINGBUFFERS; + + //printf(LOG_GSTREAMER "gstreamer camera -- using ringbuffer #%u for next frame\n", nextRingbuffer); + memcpy(mRingbufferCPU[nextRingbuffer], gstData, gstSize); + gst_buffer_unmap(gstBuffer, &map); + //gst_buffer_unref(gstBuffer); + gst_sample_unref(gstSample); + + + // update and signal sleeping threads + mRingMutex->lock(); + mLatestRingbuffer = nextRingbuffer; + mLatestRetrieved = false; + mRingMutex->unlock(); + mWaitEvent->wakeAll(); +} + + + +// buildLaunchStr +bool gstCamera::buildLaunchStr() +{ + // gst-launch-1.0 nvcamerasrc fpsRange="30.0 30.0" ! 'video/x-raw(memory:NVMM), width=(int)1920, height=(int)1080, format=(string)I420, framerate=(fraction)30/1' ! \ + // nvvidconv flip-method=2 ! 'video/x-raw(memory:NVMM), format=(string)I420' ! fakesink silent=false -v + std::ostringstream ss; + +//#define CAPS_STR "video/x-raw(memory:NVMM), width=(int)2592, height=(int)1944, format=(string)I420, framerate=(fraction)30/1" +//#define CAPS_STR "video/x-raw(memory:NVMM), width=(int)1920, height=(int)1080, format=(string)I420, framerate=(fraction)30/1" + mWidth = 1280; + mHeight = 720; + mDepth = 12; + mSize = (mWidth * mHeight * mDepth) / 8; + + ss << "nvcamerasrc fpsRange=\"30.0 30.0\" ! video/x-raw(memory:NVMM), width=(int)" << mWidth << ", height=(int)" << mHeight << ", format=(string)NV12 ! nvvidconv flip-method=2 ! "; //'video/x-raw(memory:NVMM), width=(int)1920, height=(int)1080, format=(string)I420, framerate=(fraction)30/1' ! "; + ss << "video/x-raw ! appsink name=mysink"; + + mLaunchStr = ss.str(); + + printf(LOG_GSTREAMER "gstreamer decoder pipeline string:\n"); + printf("%s\n", mLaunchStr.c_str()); + return true; +} + + +// Create +gstCamera* gstCamera::Create() +{ + if( !gstreamerInit() ) + { + printf(LOG_GSTREAMER "failed to initialize gstreamer API\n"); + return NULL; + } + + gstCamera* cam = new gstCamera(); + + if( !cam ) + return NULL; + + if( !cam->init() ) + { + printf(LOG_GSTREAMER "failed to init gstCamera\n"); + return NULL; + } + + return cam; +} + + +// init +bool gstCamera::init() +{ + GError* err = NULL; + + // build pipeline string + if( !buildLaunchStr() ) + { + printf(LOG_GSTREAMER "gstreamer decoder failed to build pipeline string\n"); + return false; + } + + // launch pipeline + mPipeline = gst_parse_launch(mLaunchStr.c_str(), &err); + + if( err != NULL ) + { + printf(LOG_GSTREAMER "gstreamer decoder failed to create pipeline\n"); + printf(LOG_GSTREAMER " (%s)\n", err->message); + g_error_free(err); + return false; + } + + GstPipeline* pipeline = GST_PIPELINE(mPipeline); + + if( !pipeline ) + { + printf(LOG_GSTREAMER "gstreamer failed to cast GstElement into GstPipeline\n"); + return false; + } + + // retrieve pipeline bus + /*GstBus**/ mBus = gst_pipeline_get_bus(pipeline); + + if( !mBus ) + { + printf(LOG_GSTREAMER "gstreamer failed to retrieve GstBus from pipeline\n"); + return false; + } + + // add watch for messages (disabled when we poll the bus ourselves, instead of gmainloop) + //gst_bus_add_watch(mBus, (GstBusFunc)gst_message_print, NULL); + + // get the appsrc + GstElement* appsinkElement = gst_bin_get_by_name(GST_BIN(pipeline), "mysink"); + GstAppSink* appsink = GST_APP_SINK(appsinkElement); + + if( !appsinkElement || !appsink) + { + printf(LOG_GSTREAMER "gstreamer failed to retrieve AppSink element from pipeline\n"); + return false; + } + + mAppSink = appsink; + + // setup callbacks + GstAppSinkCallbacks cb; + memset(&cb, 0, sizeof(GstAppSinkCallbacks)); + + cb.eos = onEOS; + cb.new_preroll = onPreroll; + cb.new_sample = onBuffer; + + gst_app_sink_set_callbacks(mAppSink, &cb, (void*)this, NULL); + + return true; +} + + +// Open +bool gstCamera::Open() +{ + // transition pipline to STATE_PLAYING + printf(LOG_GSTREAMER "gstreamer transitioning pipeline to GST_STATE_PLAYING\n"); + + const GstStateChangeReturn result = gst_element_set_state(mPipeline, GST_STATE_PLAYING); + + if( result == GST_STATE_CHANGE_ASYNC ) + { +#if 0 + GstMessage* asyncMsg = gst_bus_timed_pop_filtered(mBus, 5 * GST_SECOND, + (GstMessageType)(GST_MESSAGE_ASYNC_DONE|GST_MESSAGE_ERROR)); + + if( asyncMsg != NULL ) + { + gst_message_print(mBus, asyncMsg, this); + gst_message_unref(asyncMsg); + } + else + printf(LOG_GSTREAMER "gstreamer NULL message after transitioning pipeline to PLAYING...\n"); +#endif + } + else if( result != GST_STATE_CHANGE_SUCCESS ) + { + printf(LOG_GSTREAMER "gstreamer failed to set pipeline state to PLAYING (error %u)\n", result); + return false; + } + + checkMsgBus(); + usleep(100*1000); + checkMsgBus(); + + return true; +} + + +// Close +void gstCamera::Close() +{ + // stop pipeline + printf(LOG_GSTREAMER "gstreamer transitioning pipeline to GST_STATE_NULL\n"); + + const GstStateChangeReturn result = gst_element_set_state(mPipeline, GST_STATE_NULL); + + if( result != GST_STATE_CHANGE_SUCCESS ) + printf(LOG_GSTREAMER "gstreamer failed to set pipeline state to PLAYING (error %u)\n", result); + + usleep(250*1000); +} + + +// checkMsgBus +void gstCamera::checkMsgBus() +{ + while(true) + { + GstMessage* msg = gst_bus_pop(mBus); + + if( !msg ) + break; + + gst_message_print(mBus, msg, this); + gst_message_unref(msg); + } +} diff --git a/camera/gstCamera.h b/camera/gstCamera.h new file mode 100644 index 000000000..bb6f07805 --- /dev/null +++ b/camera/gstCamera.h @@ -0,0 +1,79 @@ +/* + * inference-101 + */ + +#ifndef __GSTREAMER_CAMERA_H__ +#define __GSTREAMER_CAMERA_H__ + +#include +#include + + +struct _GstAppSink; +class QWaitCondition; +class QMutex; + + +/** + * gstreamer CSI camera using nvcamerasrc + */ +class gstCamera +{ +public: + static gstCamera* Create(); + ~gstCamera(); + + bool Open(); + void Close(); + + // Capture YUV (NV12) + bool Capture( void** cpu, void** cuda, unsigned long timeout=ULONG_MAX ); + + // Takes in captured YUV-NV12 CUDA image, converts to float4 RGBA (with pixel intensity 0-255) + bool ConvertRGBA( void* input, void** output ); + + inline uint32_t GetWidth() const { return mWidth; } + inline uint32_t GetHeight() const { return mHeight; } + inline uint32_t GetPixelDepth() const { return mDepth; } + inline uint32_t GetSize() const { return mSize; } + +private: + static void onEOS(_GstAppSink* sink, void* user_data); + static GstFlowReturn onPreroll(_GstAppSink* sink, void* user_data); + static GstFlowReturn onBuffer(_GstAppSink* sink, void* user_data); + + gstCamera(); + + bool init(); + bool buildLaunchStr(); + void checkMsgBus(); + void checkBuffer(); + + _GstBus* mBus; + _GstAppSink* mAppSink; + _GstElement* mPipeline; + + std::string mLaunchStr; + + uint32_t mWidth; + uint32_t mHeight; + uint32_t mDepth; + uint32_t mSize; + + static const uint32_t NUM_RINGBUFFERS = 4; + + void* mRingbufferCPU[NUM_RINGBUFFERS]; + void* mRingbufferGPU[NUM_RINGBUFFERS]; + + QWaitCondition* mWaitEvent; + + QMutex* mWaitMutex; + QMutex* mRingMutex; + + uint32_t mLatestRingbuffer; + bool mLatestRetrieved; + + void* mRGBA; +}; + +#endif diff --git a/camera/gstUtility.cpp b/camera/gstUtility.cpp new file mode 100644 index 000000000..752760d27 --- /dev/null +++ b/camera/gstUtility.cpp @@ -0,0 +1,222 @@ +/* + * inference-101 + */ + +#include "gstUtility.h" + +#include +#include +#include + + +inline const char* gst_debug_level_str( GstDebugLevel level ) +{ + switch (level) + { + case GST_LEVEL_NONE: return "GST_LEVEL_NONE "; + case GST_LEVEL_ERROR: return "GST_LEVEL_ERROR "; + case GST_LEVEL_WARNING: return "GST_LEVEL_WARNING"; + case GST_LEVEL_INFO: return "GST_LEVEL_INFO "; + case GST_LEVEL_DEBUG: return "GST_LEVEL_DEBUG "; + case GST_LEVEL_LOG: return "GST_LEVEL_LOG "; + case GST_LEVEL_FIXME: return "GST_LEVEL_FIXME "; +#ifdef GST_LEVEL_TRACE + case GST_LEVEL_TRACE: return "GST_LEVEL_TRACE "; +#endif + case GST_LEVEL_MEMDUMP: return "GST_LEVEL_MEMDUMP"; + default: return " "; + } +} + +#define SEP " " + +void rilog_debug_function(GstDebugCategory* category, GstDebugLevel level, + const gchar* file, const char* function, + gint line, GObject* object, GstDebugMessage* message, + gpointer data) +{ + if( level > GST_LEVEL_WARNING /*GST_LEVEL_INFO*/ ) + return; + + //gchar* name = NULL; + //if( object != NULL ) + // g_object_get(object, "name", &name, NULL); + + const char* typeName = " "; + const char* className = " "; + + if( object != NULL ) + { + typeName = G_OBJECT_TYPE_NAME(object); + className = G_OBJECT_CLASS_NAME(object); + } + + printf(LOG_GSTREAMER "%s %s %s\n" SEP "%s:%i %s\n" SEP "%s\n", + gst_debug_level_str(level), typeName, + gst_debug_category_get_name(category), file, line, function, + gst_debug_message_get(message)); + +} + + +bool gstreamerInit() +{ + int argc = 0; + //char* argv[] = { "none" }; + + if( !gst_init_check(&argc, NULL, NULL) ) + { + printf(LOG_GSTREAMER "failed to initialize gstreamer library with gst_init()\n"); + return false; + } + + uint32_t ver[] = { 0, 0, 0, 0 }; + gst_version( &ver[0], &ver[1], &ver[2], &ver[3] ); + + printf(LOG_GSTREAMER "initialized gstreamer, version %u.%u.%u.%u\n", ver[0], ver[1], ver[2], ver[3]); + + + // debugging + gst_debug_remove_log_function(gst_debug_log_default); + + if( true ) + { + gst_debug_add_log_function(rilog_debug_function, NULL, NULL); + + gst_debug_set_active(true); + gst_debug_set_colored(false); + } + + return true; +} +//--------------------------------------------------------------------------------------------- + +static void gst_print_one_tag(const GstTagList * list, const gchar * tag, gpointer user_data) +{ + int i, num; + + num = gst_tag_list_get_tag_size (list, tag); + for (i = 0; i < num; ++i) { + const GValue *val; + + /* Note: when looking for specific tags, use the gst_tag_list_get_xyz() API, + * we only use the GValue approach here because it is more generic */ + val = gst_tag_list_get_value_index (list, tag, i); + if (G_VALUE_HOLDS_STRING (val)) { + printf("\t%20s : %s\n", tag, g_value_get_string (val)); + } else if (G_VALUE_HOLDS_UINT (val)) { + printf("\t%20s : %u\n", tag, g_value_get_uint (val)); + } else if (G_VALUE_HOLDS_DOUBLE (val)) { + printf("\t%20s : %g\n", tag, g_value_get_double (val)); + } else if (G_VALUE_HOLDS_BOOLEAN (val)) { + printf("\t%20s : %s\n", tag, + (g_value_get_boolean (val)) ? "true" : "false"); + } else if (GST_VALUE_HOLDS_BUFFER (val)) { + //GstBuffer *buf = gst_value_get_buffer (val); + //guint buffer_size = GST_BUFFER_SIZE(buf); + + printf("\t%20s : buffer of size %u\n", tag, /*buffer_size*/0); + } /*else if (GST_VALUE_HOLDS_DATE_TIME (val)) { + GstDateTime *dt = (GstDateTime*)g_value_get_boxed (val); + gchar *dt_str = gst_date_time_to_iso8601_string (dt); + + printf("\t%20s : %s\n", tag, dt_str); + g_free (dt_str); + }*/ else { + printf("\t%20s : tag of type '%s'\n", tag, G_VALUE_TYPE_NAME (val)); + } + } +} + +static const char* gst_stream_status_string( GstStreamStatusType status ) +{ + switch(status) + { + case GST_STREAM_STATUS_TYPE_CREATE: return "CREATE"; + case GST_STREAM_STATUS_TYPE_ENTER: return "ENTER"; + case GST_STREAM_STATUS_TYPE_LEAVE: return "LEAVE"; + case GST_STREAM_STATUS_TYPE_DESTROY: return "DESTROY"; + case GST_STREAM_STATUS_TYPE_START: return "START"; + case GST_STREAM_STATUS_TYPE_PAUSE: return "PAUSE"; + case GST_STREAM_STATUS_TYPE_STOP: return "STOP"; + default: return "UNKNOWN"; + } +} + +// gst_message_print +gboolean gst_message_print(GstBus* bus, GstMessage* message, gpointer user_data) +{ + + switch (GST_MESSAGE_TYPE (message)) + { + case GST_MESSAGE_ERROR: + { + GError *err = NULL; + gchar *dbg_info = NULL; + + gst_message_parse_error (message, &err, &dbg_info); + printf(LOG_GSTREAMER "gstreamer %s ERROR %s\n", GST_OBJECT_NAME (message->src), err->message); + printf(LOG_GSTREAMER "gstreamer Debugging info: %s\n", (dbg_info) ? dbg_info : "none"); + + g_error_free(err); + g_free(dbg_info); + //g_main_loop_quit (app->loop); + break; + } + case GST_MESSAGE_EOS: + { + printf(LOG_GSTREAMER "gstreamer %s recieved EOS signal...\n", GST_OBJECT_NAME(message->src)); + //g_main_loop_quit (app->loop); // TODO trigger plugin Close() upon error + break; + } + case GST_MESSAGE_STATE_CHANGED: + { + GstState old_state, new_state; + + gst_message_parse_state_changed(message, &old_state, &new_state, NULL); + + printf(LOG_GSTREAMER "gstreamer changed state from %s to %s ==> %s\n", + gst_element_state_get_name(old_state), + gst_element_state_get_name(new_state), + GST_OBJECT_NAME(message->src)); + break; + } + case GST_MESSAGE_STREAM_STATUS: + { + GstStreamStatusType streamStatus; + gst_message_parse_stream_status(message, &streamStatus, NULL); + + printf(LOG_GSTREAMER "gstreamer stream status %s ==> %s\n", + gst_stream_status_string(streamStatus), + GST_OBJECT_NAME(message->src)); + break; + } + case GST_MESSAGE_TAG: + { + GstTagList *tags = NULL; + + gst_message_parse_tag(message, &tags); + +#ifdef gst_tag_list_to_string + gchar* txt = gst_tag_list_to_string(tags); +#else + gchar* txt = "missing gst_tag_list_to_string()"; +#endif + + printf(LOG_GSTREAMER "gstreamer %s %s\n", GST_OBJECT_NAME(message->src), txt); + + g_free(txt); + //gst_tag_list_foreach(tags, gst_print_one_tag, NULL); + gst_tag_list_free(tags); + break; + } + default: + { + printf(LOG_GSTREAMER "gstreamer msg %s ==> %s\n", gst_message_type_get_name(GST_MESSAGE_TYPE(message)), GST_OBJECT_NAME(message->src)); + break; + } + } + + return TRUE; +} + diff --git a/camera/gstUtility.h b/camera/gstUtility.h new file mode 100644 index 000000000..ec7267fb0 --- /dev/null +++ b/camera/gstUtility.h @@ -0,0 +1,25 @@ +/* + * inference-101 + */ + +#ifndef __GSTREAMER_UTILITY_H__ +#define __GSTREAMER_UTILITY_H__ + + +#include + + +/** + * LOG_GSTREAMER printf prefix + */ +#define LOG_GSTREAMER "[gstreamer] " + + +bool gstreamerInit(); + +gboolean gst_message_print(_GstBus* bus, _GstMessage* message, void* user_data); + + + +#endif + diff --git a/camera/v4l2-console/CMakeLists.txt b/camera/v4l2-console/CMakeLists.txt new file mode 100644 index 000000000..28752c855 --- /dev/null +++ b/camera/v4l2-console/CMakeLists.txt @@ -0,0 +1,6 @@ + +file(GLOB v4l2ConsoleSources *.cpp) +file(GLOB v4l2ConsoleIncludes *.h ) + +add_executable(v4l2-console ${v4l2ConsoleSources}) +target_link_libraries(v4l2-console jetson-inference) diff --git a/camera/v4l2-console/v4l2-console.cpp b/camera/v4l2-console/v4l2-console.cpp new file mode 100644 index 000000000..16c1a2850 --- /dev/null +++ b/camera/v4l2-console/v4l2-console.cpp @@ -0,0 +1,137 @@ +/* + * inference-101 + */ + +#include "v4l2Camera.h" + +#include +#include +//#include +#include + + +bool signal_recieved = false; + +void sig_handler(int signo) +{ + if( signo == SIGINT ) + { + printf("received SIGINT\n"); + signal_recieved = true; + } +} + + + +int main( int argc, char** argv ) +{ + printf("v4l2-console\n args (%i): ", argc); + + /* + * verify parameters + */ + for( int i=0; i < argc; i++ ) + printf("%i [%s] ", i, argv[i]); + + printf("\n"); + + if( argc < 2 ) + { + printf("v4l2-console: 0 arguments were supplied.\n"); + printf("usage: v4l2-console \n"); + printf(" ./v4l2-console /dev/video0\n"); + + return 0; + } + + const char* dev_path = argv[1]; + printf("v4l2-console: attempting to initialize video device '%s'\n\n", dev_path); + + if( signal(SIGINT, sig_handler) == SIG_ERR ) + printf("\ncan't catch SIGINT\n"); + + /* + * create the camera device + */ + v4l2Camera* camera = v4l2Camera::Create(dev_path); + + if( !camera ) + { + printf("\nv4l2-console: failed to initialize video device '%s'\n", dev_path); + return 0; + } + + printf("\nv4l2-console: successfully initialized video device '%s'\n", dev_path); + printf(" width: %u\n", camera->GetWidth()); + printf(" height: %u\n", camera->GetHeight()); + printf(" depth: %u (bpp)\n", camera->GetPixelDepth()); + + + /* + * start streaming + */ + if( !camera->Open() ) + { + printf("\nv4l2-console: failed to open camera '%s' for streaming\n", dev_path); + return 0; + } + + printf("\nv4l2-console: camera '%s' open for streaming\n", dev_path); + + + while( !signal_recieved ) + { + uint8_t* img = (uint8_t*)camera->Capture(500); + + if( !img ) + { + //printf("got NULL image from camera capture\n"); + continue; + } + else + { + printf("recieved new video frame\n"); + + static int num_frames = 0; + + const int width = camera->GetWidth(); + const int height = camera->GetHeight(); + + QImage qImg(width, height, QImage::Format_RGB32); + + for( int y=0; y < height; y++ ) + { + for( int x=0; x < width; x++ ) + { + const int value = img[y * width + x]; + if( value != 0 ) + printf("%i %i %i\n", x, y, value); + qImg.setPixel(x, y, qRgb(value, value, value)); + } + } + + char output_filename[64]; + sprintf(output_filename, "camera-%u.jpg", num_frames); + + qImg.save(QString(output_filename)); + num_frames++; + } + + } + + printf("\nv4l2-console: un-initializing video device '%s'\n", dev_path); + + + /* + * shutdown the camera device + */ + if( camera != NULL ) + { + delete camera; + camera = NULL; + } + + printf("v4l2-console: video device '%s' has been un-initialized.\n", dev_path); + printf("v4l2-console: this concludes the test of video device '%s'\n", dev_path); + return 0; +} \ No newline at end of file diff --git a/camera/v4l2-display/CMakeLists.txt b/camera/v4l2-display/CMakeLists.txt new file mode 100644 index 000000000..559cde71b --- /dev/null +++ b/camera/v4l2-display/CMakeLists.txt @@ -0,0 +1,6 @@ + +file(GLOB v4l2DisplaySources *.cpp) +file(GLOB v4l2DisplayIncludes *.h ) + +add_executable(v4l2-display ${v4l2DisplaySources}) +target_link_libraries(v4l2-display jetson-inference) diff --git a/camera/v4l2-display/v4l2-display.cpp b/camera/v4l2-display/v4l2-display.cpp new file mode 100644 index 000000000..92d20d932 --- /dev/null +++ b/camera/v4l2-display/v4l2-display.cpp @@ -0,0 +1,98 @@ +/* + * inference-101 + */ + +#include "v4l2Camera.h" +#include "glDisplay.h" +#include "cudaMappedMemory.h" + +#include + + +int main( int argc, char** argv ) +{ + printf("v4l2-display\n args (%i): ", argc); + + /* + * verify parameters + */ + for( int i=0; i < argc; i++ ) + printf("%i [%s] ", i, argv[i]); + + printf("\n"); + + if( argc < 2 ) + { + printf("v4l2-display: 0 arguments were supplied.\n"); + printf("usage: v4l2-display \n"); + printf(" ./v4l2-display /dev/video0\n"); + + return 0; + } + + const char* dev_path = argv[1]; + printf("v4l2-display: attempting to initialize video device '%s'\n\n", dev_path); + + + /* + * create the camera device + */ + v4l2Camera* camera = v4l2Camera::Create(dev_path); + + if( !camera ) + { + printf("\nv4l2-display: failed to initialize video device '%s'\n", dev_path); + return 0; + } + + printf("\nv4l2-display: successfully initialized video device '%s'\n", dev_path); + printf(" width: %u\n", camera->GetWidth()); + printf(" height: %u\n", camera->GetHeight()); + printf(" depth: %u (bpp)\n", camera->GetPixelDepth()); + + printf("\nv4l2-display: un-initializing video device '%s'\n", dev_path); + + + /* + * create openGL window + */ + glDisplay* display = glDisplay::Create(); + + if( !display ) + { + printf("\nv4l2-display: failed to create openGL display\n"); + return 0; + } + + glTexture* tex = glTexture::Create(camera->GetWidth(), camera->GetHeight(), GL_LUMINANCE8); + + if( !tex ) + { + printf("v4l2-display: failed to create %ux%u openGL texture\n", camera->GetWidth(), camera->GetHeight()); + return 0; + } + + printf("v4l2-display: initialized %u x %u openGL texture (%u bytes)\n", tex->GetWidth(), tex->GetHeight(), tex->GetSize()); + + + + + /* + * shutdown + */ + if( display != NULL ) + { + delete display; + display = NULL; + } + + if( camera != NULL ) + { + delete camera; + camera = NULL; + } + + printf("v4l2-display: video device '%s' has been un-initialized.\n", dev_path); + printf("v4l2-display: this concludes the test of video device '%s'\n", dev_path); + return 0; +} \ No newline at end of file diff --git a/camera/v4l2Camera.cpp b/camera/v4l2Camera.cpp new file mode 100644 index 000000000..dddacd24a --- /dev/null +++ b/camera/v4l2Camera.cpp @@ -0,0 +1,481 @@ +/* + * inference-101 + */ + +#include "v4l2Camera.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + + +#define REQUESTED_RINGBUFFERS 4 + + + +// ioctl +static int xioctl(int fd, int request, void* arg) +{ + int status; + do { status = ioctl (fd, request, arg); } while (-1==status && EINTR==errno); + return status; +} + + + +// constructor +v4l2Camera::v4l2Camera( const char* device_path ) : mDevicePath(device_path) +{ + mFD = -1; + + mBuffersMMap = NULL; + mBufferCountMMap = 0; + mRequestWidth = 0; + mRequestHeight = 0; + mRequestFormat = 1; + //mRequestFormat = -1; // index into V4L2 format table + + mWidth = 0; + mHeight = 0; + mPitch = 0; + mPixelDepth = 0; +} + + +// destructor +v4l2Camera::~v4l2Camera() +{ + // close file + if( mFD >= 0 ) + { + close(mFD); + mFD = -1; + } +} + + +// ProcessEmit +void* v4l2Camera::Capture( size_t timeout ) +{ + fd_set fds; + FD_ZERO(&fds); + FD_SET(mFD, &fds); + + struct timeval tv; + + tv.tv_sec = 0; + tv.tv_usec = 0; + + const bool threaded = true; //false; + + if( timeout > 0 ) + { + tv.tv_sec = timeout / 1000; + tv.tv_usec = (timeout - (tv.tv_sec * 1000)) * 1000; + } + + // + const int result = select(mFD + 1, &fds, NULL, NULL, &tv); + + + if( result == -1 ) + { + //if (EINTR == errno) + printf("v4l2 -- select() failed (errno=%i) (%s)\n", errno, strerror(errno)); + return NULL; + } + else if( result == 0 ) + { + if( timeout > 0 ) + printf("v4l2 -- select() timed out...\n"); + return NULL; // timeout, not necessarily an error (TRY_AGAIN) + } + + // dequeue input buffer from V4L2 + struct v4l2_buffer buf; + memset(&buf, 0, sizeof(v4l2_buffer)); + + buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + buf.memory = V4L2_MEMORY_MMAP; //V4L2_MEMORY_USERPTR; + + if( xioctl(mFD, VIDIOC_DQBUF, &buf) < 0 ) + { + printf("v4l2 -- ioctl(VIDIOC_DQBUF) failed (errno=%i) (%s)\n", errno, strerror(errno)); + return NULL; + } + + if( buf.index >= mBufferCountMMap ) + { + printf("v4l2 -- invalid mmap buffer index (%u)\n", buf.index); + return NULL; + } + + // emit ringbuffer entry + //printf("v4l2 -- recieved %ux%u video frame (index=%u)\n", mWidth, mHeight, (uint32_t)buf.index); + + void* image_ptr = mBuffersMMap[buf.index].ptr; + + // re-queue buffer to V4L2 + if( xioctl(mFD, VIDIOC_QBUF, &buf) < 0 ) + printf("v4l2 -- ioctl(VIDIOC_QBUF) failed (errno=%i) (%s)\n", errno, strerror(errno)); + + return image_ptr; +} + + + +// initMMap +bool v4l2Camera::initMMap() +{ + struct v4l2_requestbuffers req; + memset(&req, 0, sizeof(v4l2_requestbuffers)); + + req.count = REQUESTED_RINGBUFFERS; + req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + req.memory = V4L2_MEMORY_MMAP; + + if( xioctl(mFD, VIDIOC_REQBUFS, &req) < 0 ) + { + printf("v4l2 -- does not support mmap (errno=%i) (%s)\n", errno, strerror(errno)); + return false; + } + + if( req.count < 2 ) + { + printf("v4l2 -- insufficient mmap memory\n"); + return false; + } + + mBuffersMMap = (v4l2_mmap*)malloc( req.count * sizeof(v4l2_mmap) ); + + if( !mBuffersMMap ) + return false; + + memset(mBuffersMMap, 0, req.count * sizeof(v4l2_mmap)); + + for( size_t n=0; n < req.count; n++ ) + { + mBuffersMMap[n].buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + mBuffersMMap[n].buf.memory = V4L2_MEMORY_MMAP; + mBuffersMMap[n].buf.index = n; + + if( xioctl(mFD, VIDIOC_QUERYBUF, &mBuffersMMap[n].buf) < 0 ) + { + printf( "v4l2 -- failed retrieve mmap buffer info (errno=%i) (%s)\n", errno, strerror(errno)); + return false; + } + + mBuffersMMap[n].ptr = mmap(NULL, mBuffersMMap[n].buf.length, + PROT_READ|PROT_WRITE, MAP_SHARED, + mFD, mBuffersMMap[n].buf.m.offset); + + if( mBuffersMMap[n].ptr == MAP_FAILED ) + { + printf( "v4l2 -- failed to mmap buffer (errno=%i) (%s)\n", errno, strerror(errno)); + return false; + } + + if( xioctl(mFD, VIDIOC_QBUF, &mBuffersMMap[n].buf) < 0 ) + { + printf( "v4l2 -- failed to queue mmap buffer (errno=%i) (%s)\n", errno, strerror(errno)); + return false; + } + } + + mBufferCountMMap = req.count; + printf("v4l2 -- mapped %zu capture buffers with mmap\n", mBufferCountMMap); + return true; +} + + +inline const char* v4l2_format_str( uint32_t fmt ) +{ + if( fmt == V4L2_PIX_FMT_SBGGR8 ) return "SBGGR8 (V4L2_PIX_FMT_SBGGR8)"; + else if( fmt == V4L2_PIX_FMT_SGBRG8 ) return "SGBRG8 (V4L2_PIX_FMT_SGBRG8)"; + else if( fmt == V4L2_PIX_FMT_SGRBG8 ) return "SGRBG8 (V4L2_PIX_FMT_SGRBG8)"; + else if( fmt == V4L2_PIX_FMT_SRGGB8 ) return "SRGGB8 (V4L2_PIX_FMT_SRGGB8)"; + else if( fmt == V4L2_PIX_FMT_SBGGR16 ) return "BYR2 (V4L2_PIX_FMT_SBGGR16)"; + else if( fmt == V4L2_PIX_FMT_SRGGB10 ) return "RG10 (V4L2_PIX_FMT_SRGGB10)"; + + return "UNKNOWN"; +} + + +inline void v4l2_print_format( const v4l2_format& fmt, const char* text ) +{ + printf("v4l2 -- %s\n", text); + printf("v4l2 -- width %u\n", fmt.fmt.pix.width); + printf("v4l2 -- height %u\n", fmt.fmt.pix.height); + printf("v4l2 -- pitch %u\n", fmt.fmt.pix.bytesperline); + printf("v4l2 -- size %u\n", fmt.fmt.pix.sizeimage); + printf("v4l2 -- format 0x%X %s\n", fmt.fmt.pix.pixelformat, v4l2_format_str(fmt.fmt.pix.pixelformat)); + printf("v4l2 -- color 0x%X\n", fmt.fmt.pix.colorspace); + printf("v4l2 -- field 0x%X\n", fmt.fmt.pix.field); +} + + +inline void v4l2_print_formatdesc( const v4l2_fmtdesc& desc ) +{ + printf("v4l2 -- format #%u\n", desc.index); + printf("v4l2 -- desc %s\n", desc.description); + printf("v4l2 -- flags %s\n", (desc.flags == 0 ? "V4L2_FMT_FLAG_UNCOMPRESSED" : "V4L2_FMT_FLAG_COMPRESSED")); + printf("v4l2 -- fourcc 0x%X %s\n", desc.pixelformat, v4l2_format_str(desc.pixelformat)); + +} + + +bool v4l2Camera::initFormats() +{ + struct v4l2_fmtdesc desc; + memset(&desc, 0, sizeof(v4l2_fmtdesc)); + + desc.index = 0; + desc.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + + while( ioctl(mFD, VIDIOC_ENUM_FMT, &desc) == 0 ) + { + mFormats.push_back(desc); + v4l2_print_formatdesc( desc ); + desc.index++; + } + + return true; +} + + +// initStream +bool v4l2Camera::initStream() +{ + struct v4l2_format fmt; + memset(&fmt, 0, sizeof(v4l2_format)); + fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + + // retrieve existing video format + if( xioctl(mFD, VIDIOC_G_FMT, &fmt) < 0 ) + { + const int err = errno; + printf( "v4l2 -- failed to get video format of device (errno=%i) (%s)\n", errno, strerror(errno)); + return false; + } + + v4l2_print_format(fmt, "preexisting format"); + +#if 1 + // setup new format + struct v4l2_format new_fmt; + memset(&new_fmt, 0, sizeof(v4l2_format)); + + new_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + new_fmt.fmt.pix.width = fmt.fmt.pix.width; + new_fmt.fmt.pix.height = fmt.fmt.pix.height; + new_fmt.fmt.pix.pixelformat = fmt.fmt.pix.pixelformat; + new_fmt.fmt.pix.field = fmt.fmt.pix.field; + new_fmt.fmt.pix.colorspace = fmt.fmt.pix.colorspace; + + if( mRequestWidth > 0 && mRequestHeight > 0 ) + { + new_fmt.fmt.pix.width = mRequestWidth; + new_fmt.fmt.pix.height = mRequestHeight; + } + + if( mRequestFormat >= 0 && mRequestFormat < mFormats.size() ) + new_fmt.fmt.pix.pixelformat = mFormats[mRequestFormat].pixelformat; + + v4l2_print_format(new_fmt, "setting new format..."); + + if( xioctl(mFD, VIDIOC_S_FMT, &new_fmt) < 0 ) + { + const int err = errno; + printf( "v4l2 -- failed to set video format of device (errno=%i) (%s)\n", errno, strerror(errno)); + return false; + } + + + // re-retrieve the current format, with detailed info like line pitch/ect. + memset(&fmt, 0, sizeof(v4l2_format)); + fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + + if( xioctl(mFD, VIDIOC_G_FMT, &fmt) < 0 ) + { + const int err = errno; + printf( "v4l2 -- failed to get video format of device (errno=%i) (%s)\n", errno, strerror(errno)); + return false; + } + + v4l2_print_format(fmt, "confirmed new format"); +#endif + + mWidth = fmt.fmt.pix.width; + mHeight = fmt.fmt.pix.height; + mPitch = fmt.fmt.pix.bytesperline; + mPixelDepth = (mPitch * 8) / mWidth; + + // initMMap + if( !initMMap() ) // initUserPtr() + return false; + + return true; +} + + +// Create +v4l2Camera* v4l2Camera::Create( const char* device_path ) +{ + v4l2Camera* cam = new v4l2Camera(device_path); + + if( !cam->init() ) + { + printf("v4l2 -- failed to create instance %s\n", device_path); + delete cam; + return NULL; + } + + return cam; +} + + +// Init +bool v4l2Camera::init() +{ + // locate the /dev/event* path for this device + mFD = open(mDevicePath.c_str(), O_RDWR | O_NONBLOCK, 0 ); + + if( mFD < 0 ) + { + printf( "v4l2 -- failed to open %s\n", mDevicePath.c_str()); + return false; + } + + // initialize + if( !initCaps() ) + return false; + + if( !initFormats() ) + return false; + + if( !initStream() ) + return false; + + return true; +} + + +// Open +bool v4l2Camera::Open() +{ + printf( "v4l2Camera::Open(%s)\n", mDevicePath.c_str()); + + // begin streaming + enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + + printf( "v4l2 -- starting streaming %s with ioctl(VIDIOC_STREAMON)...\n", mDevicePath.c_str()); + + if( xioctl(mFD, VIDIOC_STREAMON, &type) < 0 ) + { + printf( "v4l2 -- failed to start streaming (errno=%i) (%s)\n", errno, strerror(errno)); + return false; + } + + return true; +} + + +// Close +bool v4l2Camera::Close() +{ + // stop streaming + enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + + printf( "v4l2 -- stopping streaming %s with ioctl(VIDIOC_STREAMOFF)...\n", mDevicePath.c_str()); + + if( xioctl(mFD, VIDIOC_STREAMOFF, &type) < 0 ) + { + printf( "v4l2 -- failed to stop streaming (errno=%i) (%s)\n", errno, strerror(errno)); + //return false; + } + + return true; +} + + + +// initCaps +bool v4l2Camera::initCaps() +{ + struct v4l2_capability caps; + + if( xioctl(mFD, VIDIOC_QUERYCAP, &caps) < 0 ) + { + printf( "v4l2 -- failed to query caps (xioctl VIDIOC_QUERYCAP) for %s\n", mDevicePath.c_str()); + return false; + } + + #define PRINT_CAP(x) printf( "v4l2 -- %-18s %s\n", #x, (caps.capabilities & x) ? "yes" : "no") + + PRINT_CAP(V4L2_CAP_VIDEO_CAPTURE); + PRINT_CAP(V4L2_CAP_READWRITE); + PRINT_CAP(V4L2_CAP_ASYNCIO); + PRINT_CAP(V4L2_CAP_STREAMING); + + if( !(caps.capabilities & V4L2_CAP_VIDEO_CAPTURE) ) + { + printf( "v4l2 -- %s is not a video capture device\n", mDevicePath.c_str()); + return false; + } + + return true; +} + + +// initUserPtr +bool v4l2Camera::initUserPtr() +{ + // request buffers + struct v4l2_requestbuffers req; + memset(&req, 0, sizeof(v4l2_requestbuffers)); + + req.count = REQUESTED_RINGBUFFERS; + req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + req.memory = V4L2_MEMORY_USERPTR; + + if ( xioctl(mFD, VIDIOC_REQBUFS, &req) < 0 ) + { + const int err = errno; + printf( "v4l2 -- failed to request buffers (errno=%i) (%s)\n", errno, strerror(errno)); + return false; + } + + // queue ringbuffer +#if 0 + for( size_t n=0; n < mRingbuffer.size(); n++ ) + { + struct v4l2_buffer buf; + memset(&buf, 0, sizeof(v4l2_buffer)); + + buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + buf.memory = V4L2_MEMORY_USERPTR; + buf.index = n; + buf.length = mRingbuffer[n]->GetSize(); + + buf.m.userptr = (unsigned long)mRingbuffer[n]->GetCPU(); + + if( xioctl(mFD, VIDIOC_QBUF, &buf) < 0 ) + { + printf( "v4l2 -- failed to queue buffer %zu (errno=%i) (%s)\n", n, errno, strerror(errno)); + return false; + } + } +#endif + + return true; +} \ No newline at end of file diff --git a/camera/v4l2Camera.h b/camera/v4l2Camera.h new file mode 100644 index 000000000..11f555014 --- /dev/null +++ b/camera/v4l2Camera.h @@ -0,0 +1,107 @@ +/* + * inference-101 + */ + +#ifndef __V4L2_CAPTURE_H +#define __V4L2_CAPTURE_H + + +#include + +#include +#include +#include + + + +struct v4l2_mmap +{ + struct v4l2_buffer buf; + void* ptr; +}; + + +/** + * Video4Linux2 camera capture streaming. + */ +class v4l2Camera +{ +public: + /** + * Create V4L2 interface + * @param path Filename of the video device (e.g. /dev/video0) + */ + static v4l2Camera* Create( const char* device_path ); + + /** + * Destructor + */ + ~v4l2Camera(); + + /** + * Start streaming + */ + bool Open(); + + /** + * Stop streaming + */ + bool Close(); + + /** + * Return the next image. + */ + void* Capture( size_t timeout=0 ); + + /** + * Get width, in pixels, of camera image. + */ + inline uint32_t GetWidth() const { return mWidth; } + + /** + * Retrieve height, in pixels, of camera image. + */ + inline uint32_t GetHeight() const { return mHeight; } + + /** + * Return the size in bytes of one line of the image. + */ + inline uint32_t GetPitch() const { return mPitch; } + + /** + * Return the bit depth per pixel. + */ + inline uint32_t GetPixelDepth() const { return mPixelDepth; } + +private: + + v4l2Camera( const char* device_path ); + + bool init(); + bool initCaps(); + bool initFormats(); + bool initStream(); + + bool initUserPtr(); + bool initMMap(); + + int mFD; + int mRequestFormat; + uint32_t mRequestWidth; + uint32_t mRequestHeight; + uint32_t mWidth; + uint32_t mHeight; + uint32_t mPitch; + uint32_t mPixelDepth; + + v4l2_mmap* mBuffersMMap; + size_t mBufferCountMMap; + + std::vector mFormats; + std::string mDevicePath; +}; + + +#endif + + diff --git a/cuda/cudaMappedMemory.h b/cuda/cudaMappedMemory.h new file mode 100644 index 000000000..ce10d17a1 --- /dev/null +++ b/cuda/cudaMappedMemory.h @@ -0,0 +1,34 @@ +/* + * inference-101 + */ + +#ifndef __CUDA_MAPPED_MEMORY_H_ +#define __CUDA_MAPPED_MEMORY_H_ + + +#include "cudaUtility.h" + + +/** + * Allocate ZeroCopy mapped memory, shared between CUDA and CPU. + */ +inline bool cudaAllocMapped( void** cpuPtr, void** gpuPtr, size_t size ) +{ + if( !cpuPtr || !gpuPtr || size == 0 ) + return false; + + //CUDA(cudaSetDeviceFlags(cudaDeviceMapHost)); + + if( CUDA_FAILED(cudaHostAlloc(cpuPtr, size, cudaHostAllocMapped)) ) + return false; + + if( CUDA_FAILED(cudaHostGetDevicePointer(gpuPtr, *cpuPtr, 0)) ) + return false; + + memset(*cpuPtr, 0, size); + printf("[cuda] cudaAllocMapped %zu bytes, CPU %p GPU %p\n", size, *cpuPtr, *gpuPtr); + return true; +} + + +#endif diff --git a/cuda/cudaNormalize.cu b/cuda/cudaNormalize.cu new file mode 100644 index 000000000..90d4cb40f --- /dev/null +++ b/cuda/cudaNormalize.cu @@ -0,0 +1,53 @@ +/* + * inference-101 + */ + +#include "cudaNormalize.h" + + + +// gpuNormalize +template +__global__ void gpuNormalize( T* input, T* output, int width, int height, float scaling_factor ) +{ + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if( x >= width || y >= height ) + return; + + const T px = input[ y * width + x ]; + + output[y*width+x] = make_float4(px.x * scaling_factor, + px.y * scaling_factor, + px.z * scaling_factor, + px.w * scaling_factor); +} + + +// cudaNormalizeRGBA +cudaError_t cudaNormalizeRGBA( float4* input, const float2& input_range, + float4* output, const float2& output_range, + size_t width, size_t height ) +{ + if( !input || !output ) + return cudaErrorInvalidDevicePointer; + + if( width == 0 || height == 0 ) + return cudaErrorInvalidValue; + + const float multiplier = output_range.y / input_range.y; + + // launch kernel + const dim3 blockDim(8, 8); + const dim3 gridDim(iDivUp(width,blockDim.x), iDivUp(height,blockDim.y)); + + gpuNormalize<<>>(input, output, width, height, multiplier); + + return CUDA(cudaGetLastError()); +} + + + + + diff --git a/cuda/cudaNormalize.h b/cuda/cudaNormalize.h new file mode 100644 index 000000000..3b162f444 --- /dev/null +++ b/cuda/cudaNormalize.h @@ -0,0 +1,21 @@ +/* + * inference-101 + */ + +#ifndef __CUDA_NORMALIZE_H__ +#define __CUDA_NORMALIZE_H__ + + +#include "cudaUtility.h" + + +/** + * Rebase the pixel intensities of an image between two scales. + * For example, convert an image with values 0.0-255 to 0.0-1.0. + */ +cudaError_t cudaNormalizeRGBA( float4* input, const float2& input_range, + float4* output, const float2& output_range, + size_t width, size_t height ); + +#endif + diff --git a/cuda/cudaResize.cu b/cuda/cudaResize.cu new file mode 100644 index 000000000..92316cbd4 --- /dev/null +++ b/cuda/cudaResize.cu @@ -0,0 +1,76 @@ +/* + * inference-101 + */ + +#include "cudaResize.h" + + + +// gpuResample +template +__global__ void gpuResize( float2 scale, T* input, int iWidth, T* output, int oWidth, int oHeight ) +{ + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if( x >= oWidth || y >= oHeight ) + return; + + const int dx = ((float)x * scale.x); + const int dy = ((float)y * scale.y); + + const T px = input[ dy * iWidth + dx ]; + + output[y*oWidth+x] = px; +} + + +// cudaResize +cudaError_t cudaResize( float* input, size_t inputWidth, size_t inputHeight, + float* output, size_t outputWidth, size_t outputHeight ) +{ + if( !input || !output ) + return cudaErrorInvalidDevicePointer; + + if( inputWidth == 0 || outputWidth == 0 || inputHeight == 0 || outputHeight == 0 ) + return cudaErrorInvalidValue; + + const float2 scale = make_float2( float(inputWidth) / float(outputWidth), + float(inputHeight) / float(outputHeight) ); + + // launch kernel + const dim3 blockDim(8, 8); + const dim3 gridDim(iDivUp(outputWidth,blockDim.x), iDivUp(outputHeight,blockDim.y)); + + gpuResize<<>>(scale, input, inputWidth, output, outputWidth, outputHeight); + + return CUDA(cudaGetLastError()); +} + + +// cudaResizeRGBA +cudaError_t cudaResizeRGBA( float4* input, size_t inputWidth, size_t inputHeight, + float4* output, size_t outputWidth, size_t outputHeight ) +{ + if( !input || !output ) + return cudaErrorInvalidDevicePointer; + + if( inputWidth == 0 || outputWidth == 0 || inputHeight == 0 || outputHeight == 0 ) + return cudaErrorInvalidValue; + + const float2 scale = make_float2( float(inputWidth) / float(outputWidth), + float(inputHeight) / float(outputHeight) ); + + // launch kernel + const dim3 blockDim(8, 8); + const dim3 gridDim(iDivUp(outputWidth,blockDim.x), iDivUp(outputHeight,blockDim.y)); + + gpuResize<<>>(scale, input, inputWidth, output, outputWidth, outputHeight); + + return CUDA(cudaGetLastError()); +} + + + + + diff --git a/cuda/cudaResize.h b/cuda/cudaResize.h new file mode 100644 index 000000000..ada08d689 --- /dev/null +++ b/cuda/cudaResize.h @@ -0,0 +1,29 @@ +/* + * inference-101 + */ + +#ifndef __CUDA_RESIZE_H__ +#define __CUDA_RESIZE_H__ + + +#include "cudaUtility.h" + + +/** + * Function for increasing or decreasing the size of an image on the GPU. + */ +cudaError_t cudaResize( float* input, size_t inputWidth, size_t inputHeight, + float* output, size_t outputWidth, size_t outputHeight ); + + +/** + * Function for increasing or decreasing the size of an image on the GPU. + */ +cudaError_t cudaResizeRGBA( float4* input, size_t inputWidth, size_t inputHeight, + float4* output, size_t outputWidth, size_t outputHeight ); + + + + +#endif + diff --git a/cuda/cudaUtility.h b/cuda/cudaUtility.h new file mode 100644 index 000000000..ddf3b417f --- /dev/null +++ b/cuda/cudaUtility.h @@ -0,0 +1,83 @@ +/* + * inference-101 + */ + +#ifndef __CUDA_UTILITY_H_ +#define __CUDA_UTILITY_H_ + + +#include +#include +#include +#include + + +/** + * Execute a CUDA call and print out any errors + * @return the original cudaError_t result + */ +#define CUDA(x) cudaCheckError((x), #x, __FILE__, __LINE__) + +/** + * Evaluates to true on success + */ +#define CUDA_SUCCESS(x) (CUDA(x) == cudaSuccess) + +/** + * Evaluates to true on failure + */ +#define CUDA_FAILED(x) (CUDA(x) != cudaSuccess) + +/** + * Return from the boolean function if CUDA call fails + */ +#define CUDA_VERIFY(x) if(CUDA_FAILED(x)) return false; + +/** + * LOG_CUDA string. + */ +#define LOG_CUDA "[cuda] " + +/* + * define this if you want all cuda calls to be printed + */ +//#define CUDA_TRACE + + + +/** + * cudaCheckError + */ +inline cudaError_t cudaCheckError(cudaError_t retval, const char* txt, const char* file, int line ) +{ +#if !defined(CUDA_TRACE) + if( retval == cudaSuccess) + return cudaSuccess; +#endif + + //int activeDevice = -1; + //cudaGetDevice(&activeDevice); + + //Log("[cuda] device %i - %s\n", activeDevice, txt); + + printf(LOG_CUDA "%s\n", txt); + + + if( retval != cudaSuccess ) + { + printf(LOG_CUDA " %s (error %u) (hex 0x%02X)\n", cudaGetErrorString(retval), retval, retval); + printf(LOG_CUDA " %s:%i\n", file, line); + } + + return retval; +} + + +/** + * iDivUp + */ +inline __device__ __host__ int iDivUp( int a, int b ) { return (a % b != 0) ? (a / b + 1) : (a / b); } + + + +#endif diff --git a/cuda/cudaYUV-NV12.cu b/cuda/cudaYUV-NV12.cu new file mode 100644 index 000000000..58c4c49e9 --- /dev/null +++ b/cuda/cudaYUV-NV12.cu @@ -0,0 +1,424 @@ +/* + * inference-101 + */ + +#include "cudaYUV.h" + + +#define COLOR_COMPONENT_MASK 0x3FF +#define COLOR_COMPONENT_BIT_SIZE 10 + +#define FIXED_DECIMAL_POINT 24 +#define FIXED_POINT_MULTIPLIER 1.0f +#define FIXED_COLOR_COMPONENT_MASK 0xffffffff + +#define MUL(x,y) (x*y) + + + +__constant__ uint32_t constAlpha; +__constant__ float constHueColorSpaceMat[9]; + + + +__device__ void YUV2RGB(uint32_t *yuvi, float *red, float *green, float *blue) +{ + + + // Prepare for hue adjustment + /* + float luma, chromaCb, chromaCr; + + luma = (float)yuvi[0]; + chromaCb = (float)((int)yuvi[1] - 512.0f); + chromaCr = (float)((int)yuvi[2] - 512.0f); + + // Convert YUV To RGB with hue adjustment + *red = MUL(luma, constHueColorSpaceMat[0]) + + MUL(chromaCb, constHueColorSpaceMat[1]) + + MUL(chromaCr, constHueColorSpaceMat[2]); + *green= MUL(luma, constHueColorSpaceMat[3]) + + MUL(chromaCb, constHueColorSpaceMat[4]) + + MUL(chromaCr, constHueColorSpaceMat[5]); + *blue = MUL(luma, constHueColorSpaceMat[6]) + + MUL(chromaCb, constHueColorSpaceMat[7]) + + MUL(chromaCr, constHueColorSpaceMat[8]);*/ + + const float luma = float(yuvi[0]); + const float u = float(yuvi[1]) - 512.0f; + const float v = float(yuvi[2]) - 512.0f; + + /*R = Y + 1.140V + G = Y - 0.395U - 0.581V + B = Y + 2.032U*/ + + /**green = luma + 1.140f * v; + *blue = luma - 0.395f * u - 0.581f * v; + *red = luma + 2.032f * u;*/ + + *red = luma + 1.140f * v; + *green = luma - 0.395f * u - 0.581f * v; + *blue = luma + 2.032f * u; +} + + +__device__ uint32_t RGBAPACK_8bit(float red, float green, float blue, uint32_t alpha) +{ + uint32_t ARGBpixel = 0; + + // Clamp final 10 bit results + red = min(max(red, 0.0f), 255.0f); + green = min(max(green, 0.0f), 255.0f); + blue = min(max(blue, 0.0f), 255.0f); + + // Convert to 8 bit unsigned integers per color component + ARGBpixel = ((((uint32_t)red) << 24) | + (((uint32_t)green) << 16) | + (((uint32_t)blue) << 8) | (uint32_t)alpha); + + return ARGBpixel; +} + + +__device__ uint32_t RGBAPACK_10bit(float red, float green, float blue, uint32_t alpha) +{ + uint32_t ARGBpixel = 0; + + // Clamp final 10 bit results + red = min(max(red, 0.0f), 1023.f); + green = min(max(green, 0.0f), 1023.f); + blue = min(max(blue, 0.0f), 1023.f); + + // Convert to 8 bit unsigned integers per color component + ARGBpixel = ((((uint32_t)red >> 2) << 24) | + (((uint32_t)green >> 2) << 16) | + (((uint32_t)blue >> 2) << 8) | (uint32_t)alpha); + + return ARGBpixel; +} + + +// CUDA kernel for outputing the final ARGB output from NV12; +/*extern "C"*/ +__global__ void Passthru(uint32_t *srcImage, size_t nSourcePitch, + uint32_t *dstImage, size_t nDestPitch, + uint32_t width, uint32_t height) +{ + int x, y; + uint32_t yuv101010Pel[2]; + uint32_t processingPitch = ((width) + 63) & ~63; + uint32_t dstImagePitch = nDestPitch >> 2; + uint8_t *srcImageU8 = (uint8_t *)srcImage; + + processingPitch = nSourcePitch; + + // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread + x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1); + y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= width) + return; //x = width - 1; + + if (y >= height) + return; // y = height - 1; + + // Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way. + // if we move to texture we could read 4 luminance values + yuv101010Pel[0] = (srcImageU8[y * processingPitch + x ]); + yuv101010Pel[1] = (srcImageU8[y * processingPitch + x + 1]); + + // this steps performs the color conversion + float luma[2]; + + luma[0] = (yuv101010Pel[0] & 0x00FF); + luma[1] = (yuv101010Pel[1] & 0x00FF); + + // Clamp the results to RGBA + dstImage[y * dstImagePitch + x ] = RGBAPACK_8bit(luma[0], luma[0], luma[0], constAlpha); + dstImage[y * dstImagePitch + x + 1 ] = RGBAPACK_8bit(luma[1], luma[1], luma[1], constAlpha); +} + + +// CUDA kernel for outputing the final ARGB output from NV12; +/*extern "C"*/ +__global__ void NV12ToARGB(uint32_t *srcImage, size_t nSourcePitch, + uint32_t *dstImage, size_t nDestPitch, + uint32_t width, uint32_t height) +{ + int x, y; + uint32_t yuv101010Pel[2]; + uint32_t processingPitch = ((width) + 63) & ~63; + uint32_t dstImagePitch = nDestPitch >> 2; + uint8_t *srcImageU8 = (uint8_t *)srcImage; + + processingPitch = nSourcePitch; + + // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread + x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1); + y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= width) + return; //x = width - 1; + + if (y >= height) + return; // y = height - 1; + + // Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way. + // if we move to texture we could read 4 luminance values + yuv101010Pel[0] = (srcImageU8[y * processingPitch + x ]) << 2; + yuv101010Pel[1] = (srcImageU8[y * processingPitch + x + 1]) << 2; + + uint32_t chromaOffset = processingPitch * height; + int y_chroma = y >> 1; + + if (y & 1) // odd scanline ? + { + uint32_t chromaCb; + uint32_t chromaCr; + + chromaCb = srcImageU8[chromaOffset + y_chroma * processingPitch + x ]; + chromaCr = srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1]; + + if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically + { + chromaCb = (chromaCb + srcImageU8[chromaOffset + (y_chroma + 1) * processingPitch + x ] + 1) >> 1; + chromaCr = (chromaCr + srcImageU8[chromaOffset + (y_chroma + 1) * processingPitch + x + 1] + 1) >> 1; + } + + yuv101010Pel[0] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); + + yuv101010Pel[1] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); + } + else + { + yuv101010Pel[0] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x ] << (COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[0] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); + + yuv101010Pel[1] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x ] << (COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[1] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); + } + + // this steps performs the color conversion + uint32_t yuvi[6]; + float red[2], green[2], blue[2]; + + yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK); + yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); + yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); + + yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK); + yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); + yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); + + // YUV to RGB Transformation conversion + YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]); + YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]); + + // Clamp the results to RGBA + dstImage[y * dstImagePitch + x ] = RGBAPACK_10bit(red[0], green[0], blue[0], constAlpha); + dstImage[y * dstImagePitch + x + 1 ] = RGBAPACK_10bit(red[1], green[1], blue[1], constAlpha); +} + + +bool nv12ColorspaceSetup = false; + + +// cudaNV12ToARGB32 +cudaError_t cudaNV12ToRGBA( uint8_t* srcDev, size_t srcPitch, uchar4* destDev, size_t destPitch, size_t width, size_t height ) +{ + if( !srcDev || !destDev ) + return cudaErrorInvalidDevicePointer; + + if( srcPitch == 0 || destPitch == 0 || width == 0 || height == 0 ) + return cudaErrorInvalidValue; + + if( !nv12ColorspaceSetup ) + cudaNV12SetupColorspace(); + + const dim3 blockDim(32,16,1); + const dim3 gridDim((width+(2*blockDim.x-1))/(2*blockDim.x), (height+(blockDim.y-1))/blockDim.y, 1); + + NV12ToARGB<<>>( (uint32_t*)srcDev, srcPitch, (uint32_t*)destDev, destPitch, width, height ); + + return CUDA(cudaGetLastError()); +} + +cudaError_t cudaNV12ToRGBA( uint8_t* srcDev, uchar4* destDev, size_t width, size_t height ) +{ + return cudaNV12ToRGBA(srcDev, width * sizeof(uint8_t), destDev, width * sizeof(uchar4), width, height); +} + + +//------------------------------------------------------------------------------------------------------------------------- + +__global__ void NV12ToRGBAf(uint32_t* srcImage, size_t nSourcePitch, + float4* dstImage, size_t nDestPitch, + uint32_t width, uint32_t height) +{ + int x, y; + uint32_t yuv101010Pel[2]; + uint32_t processingPitch = ((width) + 63) & ~63; + uint8_t *srcImageU8 = (uint8_t *)srcImage; + + processingPitch = nSourcePitch; + + // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread + x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1); + y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= width) + return; //x = width - 1; + + if (y >= height) + return; // y = height - 1; + +#if 1 + // Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way. + // if we move to texture we could read 4 luminance values + yuv101010Pel[0] = (srcImageU8[y * processingPitch + x ]) << 2; + yuv101010Pel[1] = (srcImageU8[y * processingPitch + x + 1]) << 2; + + uint32_t chromaOffset = processingPitch * height; + int y_chroma = y >> 1; + + if (y & 1) // odd scanline ? + { + uint32_t chromaCb; + uint32_t chromaCr; + + chromaCb = srcImageU8[chromaOffset + y_chroma * processingPitch + x ]; + chromaCr = srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1]; + + if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically + { + chromaCb = (chromaCb + srcImageU8[chromaOffset + (y_chroma + 1) * processingPitch + x ] + 1) >> 1; + chromaCr = (chromaCr + srcImageU8[chromaOffset + (y_chroma + 1) * processingPitch + x + 1] + 1) >> 1; + } + + yuv101010Pel[0] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); + + yuv101010Pel[1] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); + } + else + { + yuv101010Pel[0] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x ] << (COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[0] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); + + yuv101010Pel[1] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x ] << (COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[1] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); + } + + // this steps performs the color conversion + uint32_t yuvi[6]; + float red[2], green[2], blue[2]; + + yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK); + yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); + yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); + + yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK); + yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); + yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); + + // YUV to RGB Transformation conversion + YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]); + YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]); + + // Clamp the results to RGBA + //printf("cuda thread %i %i %f %f %f\n", x, y, red[0], green[0], blue[0]); + + const float s = 1.0f / 1024.0f * 255.0f; + + dstImage[y * width + x] = make_float4(red[0] * s, green[0] * s, blue[0] * s, 1.0f); + dstImage[y * width + x + 1] = make_float4(red[1] * s, green[1] * s, blue[1] * s, 1.0f); +#else + //printf("cuda thread %i %i %i %i \n", x, y, width, height); + + dstImage[y * width + x] = make_float4(1.0f, 0.0f, 0.0f, 1.0f); + dstImage[y * width + x + 1] = make_float4(1.0f, 0.0f, 0.0f, 1.0f); +#endif +} + + + +// cudaNV12ToRGBA +cudaError_t cudaNV12ToRGBAf( uint8_t* srcDev, size_t srcPitch, float4* destDev, size_t destPitch, size_t width, size_t height ) +{ + if( !srcDev || !destDev ) + return cudaErrorInvalidDevicePointer; + + if( srcPitch == 0 || destPitch == 0 || width == 0 || height == 0 ) + return cudaErrorInvalidValue; + + if( !nv12ColorspaceSetup ) + cudaNV12SetupColorspace(); + + const dim3 blockDim(8,8,1); + //const dim3 gridDim((width+(2*blockDim.x-1))/(2*blockDim.x), (height+(blockDim.y-1))/blockDim.y, 1); + const dim3 gridDim(iDivUp(width,blockDim.x), iDivUp(height, blockDim.y), 1); + + NV12ToRGBAf<<>>( (uint32_t*)srcDev, srcPitch, destDev, destPitch, width, height ); + + return CUDA(cudaGetLastError()); +} + +cudaError_t cudaNV12ToRGBAf( uint8_t* srcDev, float4* destDev, size_t width, size_t height ) +{ + return cudaNV12ToRGBAf(srcDev, width * sizeof(uint8_t), destDev, width * sizeof(float4), width, height); +} + + +// cudaNV12SetupColorspace +cudaError_t cudaNV12SetupColorspace( float hue ) +{ + const float hueSin = sin(hue); + const float hueCos = cos(hue); + + float hueCSC[9]; + + const bool itu601 = false; + + if( itu601 /*CSC == ITU601*/) + { + //CCIR 601 + hueCSC[0] = 1.1644f; + hueCSC[1] = hueSin * 1.5960f; + hueCSC[2] = hueCos * 1.5960f; + hueCSC[3] = 1.1644f; + hueCSC[4] = (hueCos * -0.3918f) - (hueSin * 0.8130f); + hueCSC[5] = (hueSin * 0.3918f) - (hueCos * 0.8130f); + hueCSC[6] = 1.1644f; + hueCSC[7] = hueCos * 2.0172f; + hueCSC[8] = hueSin * -2.0172f; + } + else /*if(CSC == ITU709)*/ + { + //CCIR 709 + hueCSC[0] = 1.0f; + hueCSC[1] = hueSin * 1.57480f; + hueCSC[2] = hueCos * 1.57480f; + hueCSC[3] = 1.0; + hueCSC[4] = (hueCos * -0.18732f) - (hueSin * 0.46812f); + hueCSC[5] = (hueSin * 0.18732f) - (hueCos * 0.46812f); + hueCSC[6] = 1.0f; + hueCSC[7] = hueCos * 1.85560f; + hueCSC[8] = hueSin * -1.85560f; + } + + + if( CUDA_FAILED(cudaMemcpyToSymbol(constHueColorSpaceMat, hueCSC, sizeof(float) * 9)) ) + return cudaErrorInvalidSymbol; + + uint32_t cudaAlpha = ((uint32_t)0xff<< 24); + + if( CUDA_FAILED(cudaMemcpyToSymbol(constAlpha, &cudaAlpha, sizeof(uint32_t))) ) + return cudaErrorInvalidSymbol; + + nv12ColorspaceSetup = true; + return cudaSuccess; +} + diff --git a/cuda/cudaYUV-YUYV.cu b/cuda/cudaYUV-YUYV.cu new file mode 100644 index 000000000..18cfb6afc --- /dev/null +++ b/cuda/cudaYUV-YUYV.cu @@ -0,0 +1,184 @@ +/* + * inference-101 + */ + +#include "cudaYUV.h" + + + +inline __device__ __host__ float clamp(float f, float a, float b) +{ + return fmaxf(a, fminf(f, b)); +} + + +/* From RGB to YUV + + Y = 0.299R + 0.587G + 0.114B + U = 0.492 (B-Y) + V = 0.877 (R-Y) + + It can also be represented as: + + Y = 0.299R + 0.587G + 0.114B + U = -0.147R - 0.289G + 0.436B + V = 0.615R - 0.515G - 0.100B + + From YUV to RGB + + R = Y + 1.140V + G = Y - 0.395U - 0.581V + B = Y + 2.032U + */ + +struct __align__(8) uchar8 +{ + uint8_t a0, a1, a2, a3, a4, a5, a6, a7; +}; +static __host__ __device__ __forceinline__ uchar8 make_uchar8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7) +{ + uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; + return val; +} + + +//----------------------------------------------------------------------------------- +// YUYV/UYVY to RGBA +//----------------------------------------------------------------------------------- +template +__global__ void yuyvToRgba( uchar4* src, int srcAlignedWidth, uchar8* dst, int dstAlignedWidth, int width, int height ) +{ + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if( x >= srcAlignedWidth || y >= height ) + return; + + const uchar4 macroPx = src[y * srcAlignedWidth + x]; + + // Y0 is the brightness of pixel 0, Y1 the brightness of pixel 1. + // U0 and V0 is the color of both pixels. + // UYVY [ U0 | Y0 | V0 | Y1 ] + // YUYV [ Y0 | U0 | Y1 | V0 ] + const float y0 = formatUYVY ? macroPx.y : macroPx.x; + const float y1 = formatUYVY ? macroPx.w : macroPx.z; + const float u = (formatUYVY ? macroPx.x : macroPx.y) - 128.0f; + const float v = (formatUYVY ? macroPx.z : macroPx.w) - 128.0f; + + const float4 px0 = make_float4( y0 + 1.4065f * v, + y0 - 0.3455f * u - 0.7169f * v, + y0 + 1.7790f * u, 255.0f ); + + const float4 px1 = make_float4( y1 + 1.4065f * v, + y1 - 0.3455f * u - 0.7169f * v, + y1 + 1.7790f * u, 255.0f ); + + dst[y * dstAlignedWidth + x] = make_uchar8( clamp(px0.x, 0.0f, 255.0f), + clamp(px0.y, 0.0f, 255.0f), + clamp(px0.z, 0.0f, 255.0f), + clamp(px0.w, 0.0f, 255.0f), + clamp(px1.x, 0.0f, 255.0f), + clamp(px1.y, 0.0f, 255.0f), + clamp(px1.z, 0.0f, 255.0f), + clamp(px1.w, 0.0f, 255.0f) ); +} + +template +cudaError_t launchYUYV( uchar2* input, size_t inputPitch, uchar4* output, size_t outputPitch, size_t width, size_t height) +{ + if( !input || !inputPitch || !output || !outputPitch || !width || !height ) + return cudaErrorInvalidValue; + + const dim3 block(8,8); + const dim3 grid(iDivUp(width/2, block.x), iDivUp(height, block.y)); + + const int srcAlignedWidth = inputPitch / sizeof(uchar4); // normally would be uchar2, but we're doubling up pixels + const int dstAlignedWidth = outputPitch / sizeof(uchar8); // normally would be uchar4 ^^^ + + //printf("yuyvToRgba %zu %zu %i %i %i %i %i\n", width, height, (int)formatUYVY, srcAlignedWidth, dstAlignedWidth, grid.x, grid.y); + + yuyvToRgba<<>>((uchar4*)input, srcAlignedWidth, (uchar8*)output, dstAlignedWidth, width, height); + + return CUDA(cudaGetLastError()); +} + + +cudaError_t cudaUYVYToRGBA( uchar2* input, uchar4* output, size_t width, size_t height ) +{ + return cudaUYVYToRGBA(input, width * sizeof(uchar2), output, width * sizeof(uchar4), width, height); +} + +cudaError_t cudaUYVYToRGBA( uchar2* input, size_t inputPitch, uchar4* output, size_t outputPitch, size_t width, size_t height ) +{ + return launchYUYV(input, inputPitch, output, outputPitch, width, height); +} + +cudaError_t cudaYUYVToRGBA( uchar2* input, uchar4* output, size_t width, size_t height ) +{ + return cudaYUYVToRGBA(input, width * sizeof(uchar2), output, width * sizeof(uchar4), width, height); +} + +cudaError_t cudaYUYVToRGBA( uchar2* input, size_t inputPitch, uchar4* output, size_t outputPitch, size_t width, size_t height ) +{ + return launchYUYV(input, inputPitch, output, outputPitch, width, height); +} + + +//----------------------------------------------------------------------------------- +// YUYV/UYVY to grayscale +//----------------------------------------------------------------------------------- + +template +__global__ void yuyvToGray( uchar4* src, int srcAlignedWidth, float2* dst, int dstAlignedWidth, int width, int height ) +{ + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if( x >= srcAlignedWidth || y >= height ) + return; + + const uchar4 macroPx = src[y * srcAlignedWidth + x]; + + const float y0 = formatUYVY ? macroPx.y : macroPx.x; + const float y1 = formatUYVY ? macroPx.w : macroPx.z; + + dst[y * dstAlignedWidth + x] = make_float2(y0/255.0f, y1/255.0f); +} + +template +cudaError_t launchGrayYUYV( uchar2* input, size_t inputPitch, float* output, size_t outputPitch, size_t width, size_t height) +{ + if( !input || !inputPitch || !output || !outputPitch || !width || !height ) + return cudaErrorInvalidValue; + + const dim3 block(8,8); + const dim3 grid(iDivUp(width/2, block.x), iDivUp(height, block.y)); + + const int srcAlignedWidth = inputPitch / sizeof(uchar4); // normally would be uchar2, but we're doubling up pixels + const int dstAlignedWidth = outputPitch / sizeof(float2); // normally would be float ^^^ + + yuyvToGray<<>>((uchar4*)input, srcAlignedWidth, (float2*)output, dstAlignedWidth, width, height); + + return CUDA(cudaGetLastError()); +} + +cudaError_t cudaUYVYToGray( uchar2* input, float* output, size_t width, size_t height ) +{ + return cudaUYVYToGray(input, width * sizeof(uchar2), output, width * sizeof(uint8_t), width, height); +} + +cudaError_t cudaUYVYToGray( uchar2* input, size_t inputPitch, float* output, size_t outputPitch, size_t width, size_t height ) +{ + return launchGrayYUYV(input, inputPitch, output, outputPitch, width, height); +} + +cudaError_t cudaYUYVToGray( uchar2* input, float* output, size_t width, size_t height ) +{ + return cudaYUYVToGray(input, width * sizeof(uchar2), output, width * sizeof(float), width, height); +} + +cudaError_t cudaYUYVToGray( uchar2* input, size_t inputPitch, float* output, size_t outputPitch, size_t width, size_t height ) +{ + return launchGrayYUYV(input, inputPitch, output, outputPitch, width, height); +} + diff --git a/cuda/cudaYUV-YV12.cu b/cuda/cudaYUV-YV12.cu new file mode 100644 index 000000000..280325d7b --- /dev/null +++ b/cuda/cudaYUV-YV12.cu @@ -0,0 +1,159 @@ +/* + * inference-101 + */ + +#include "cudaYUV.h" + + + + + +inline __device__ void rgb_to_y(const uint8_t r, const uint8_t g, const uint8_t b, uint8_t& y) +{ + y = static_cast(((int)(30 * r) + (int)(59 * g) + (int)(11 * b)) / 100); +} + +inline __device__ void rgb_to_yuv(const uint8_t r, const uint8_t g, const uint8_t b, uint8_t& y, uint8_t& u, uint8_t& v) +{ + rgb_to_y(r, g, b, y); + u = static_cast(((int)(-17 * r) - (int)(33 * g) + (int)(50 * b) + 12800) / 100); + v = static_cast(((int)(50 * r) - (int)(42 * g) - (int)(8 * b) + 12800) / 100); +} + +template +__global__ void RGB_to_YV12( T* src, int srcAlignedWidth, uint8_t* dst, int dstPitch, int width, int height ) +{ + const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2; + const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2; + + const int x1 = x + 1; + const int y1 = y + 1; + + if( x1 >= width || y1 >= height ) + return; + + const int planeSize = height * dstPitch; + + uint8_t* y_plane = dst; + uint8_t* u_plane; + uint8_t* v_plane; + + if( formatYV12 ) + { + u_plane = y_plane + planeSize; + v_plane = u_plane + (planeSize / 4); // size of U & V planes is 25% of Y plane + } + else + { + v_plane = y_plane + planeSize; // in I420, order of U & V planes is reversed + u_plane = v_plane + (planeSize / 4); + } + + T px; + uint8_t y_val, u_val, v_val; + + px = src[y * srcAlignedWidth + x]; + rgb_to_y(px.x, px.y, px.z, y_val); + y_plane[y * dstPitch + x] = y_val; + + px = src[y * srcAlignedWidth + x1]; + rgb_to_y(px.x, px.y, px.z, y_val); + y_plane[y * dstPitch + x1] = y_val; + + px = src[y1 * srcAlignedWidth + x]; + rgb_to_y(px.x, px.y, px.z, y_val); + y_plane[y1 * dstPitch + x] = y_val; + + px = src[y1 * srcAlignedWidth + x1]; + rgb_to_yuv(px.x, px.y, px.z, y_val, u_val, v_val); + y_plane[y1 * dstPitch + x1] = y_val; + + const int uvPitch = dstPitch / 2; + const int uvIndex = (y / 2) * uvPitch + (x / 2); + + u_plane[uvIndex] = u_val; + v_plane[uvIndex] = v_val; +} + +template +cudaError_t launch420( T* input, size_t inputPitch, uint8_t* output, size_t outputPitch, size_t width, size_t height) +{ + if( !input || !inputPitch || !output || !outputPitch || !width || !height ) + return cudaErrorInvalidValue; + + const dim3 block(32, 8); + const dim3 grid(iDivUp(width, block.x * 2), iDivUp(height, block.y * 2)); + + const int inputAlignedWidth = inputPitch / sizeof(T); + + RGB_to_YV12<<>>(input, inputAlignedWidth, output, outputPitch, width, height); + + return CUDA(cudaGetLastError()); +} + + + +// cudaRGBAToYV12 +cudaError_t cudaRGBAToYV12( uchar4* input, size_t inputPitch, uint8_t* output, size_t outputPitch, size_t width, size_t height ) +{ + return launch420( input, inputPitch, output, outputPitch, width, height ); +} + +// cudaRGBAToYV12 +cudaError_t cudaRGBAToYV12( uchar4* input, uint8_t* output, size_t width, size_t height ) +{ + return cudaRGBAToYV12( input, width * sizeof(uchar4), output, width * sizeof(uint8_t), width, height ); +} + +// cudaRGBAToI420 +cudaError_t cudaRGBAToI420( uchar4* input, size_t inputPitch, uint8_t* output, size_t outputPitch, size_t width, size_t height ) +{ + return launch420( input, inputPitch, output, outputPitch, width, height ); +} + +// cudaRGBAToI420 +cudaError_t cudaRGBAToI420( uchar4* input, uint8_t* output, size_t width, size_t height ) +{ + return cudaRGBAToI420( input, width * sizeof(uchar4), output, width * sizeof(uint8_t), width, height ); +} + + + +#if 0 +__global__ void Gray_to_YV12(const GlobPtrSz src, GlobPtr dst) +{ + const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2; + const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2; + + if (x + 1 >= src.cols || y + 1 >= src.rows) + return; + + // get pointers to the data + const size_t planeSize = src.rows * dst.step; + GlobPtr y_plane = globPtr(dst.data, dst.step); + GlobPtr u_plane = globPtr(y_plane.data + planeSize, dst.step / 2); + GlobPtr v_plane = globPtr(u_plane.data + (planeSize / 4), dst.step / 2); + + uint8_t pix; + uint8_t y_val, u_val, v_val; + + pix = src(y, x); + rgb_to_y(pix, pix, pix, y_val); + y_plane(y, x) = y_val; + + pix = src(y, x + 1); + rgb_to_y(pix, pix, pix, y_val); + y_plane(y, x + 1) = y_val; + + pix = src(y + 1, x); + rgb_to_y(pix, pix, pix, y_val); + y_plane(y + 1, x) = y_val; + + pix = src(y + 1, x + 1); + rgb_to_yuv(pix, pix, pix, y_val, u_val, v_val); + y_plane(y + 1, x + 1) = y_val; + u_plane(y / 2, x / 2) = u_val; + v_plane(y / 2, x / 2) = v_val; +} +#endif + diff --git a/cuda/cudaYUV.h b/cuda/cudaYUV.h new file mode 100644 index 000000000..be2838fe7 --- /dev/null +++ b/cuda/cudaYUV.h @@ -0,0 +1,128 @@ +/* + * inference-101 + */ + +#ifndef __CUDA_YUV_CONVERT_H +#define __CUDA_YUV_CONVERT_H + + +#include "cudaUtility.h" +#include + + +////////////////////////////////////////////////////////////////////////////////// +/// @name RGBA to YUV 4:2:0 planar (I420 & YV12) +////////////////////////////////////////////////////////////////////////////////// + +///@{ + +/** + * Convert an RGBA uchar4 buffer into YUV I420 planar. + */ +cudaError_t cudaRGBAToI420( uchar4* input, uint8_t* output, size_t width, size_t height ); + +/** + * Convert an RGBA uchar4 texture into YUV I420 planar. + */ +cudaError_t cudaRGBAToI420( uchar4* input, size_t inputPitch, uint8_t* output, size_t outputPitch, size_t width, size_t height ); + +/** + * Convert an RGBA uchar4 buffer into YUV YV12 planar. + */ +cudaError_t cudaRGBAToYV12( uchar4* input, uint8_t* output, size_t width, size_t height ); + +/** + * Convert an RGBA uchar4 texture into YUV YV12 planar. + */ +cudaError_t cudaRGBAToYV12( uchar4* input, size_t inputPitch, uint8_t* output, size_t outputPitch, size_t width, size_t height ); + +///@} + + +////////////////////////////////////////////////////////////////////////////////// +/// @name YUV 4:2:2 packed (UYVY & YUYV) to RGBA +////////////////////////////////////////////////////////////////////////////////// + +///@{ + +/** + * Convert a UYVY 422 packed image into RGBA uchar4. + */ +cudaError_t cudaUYVYToRGBA( uchar2* input, uchar4* output, size_t width, size_t height ); + +/** + * Convert a UYVY 422 packed image into RGBA uchar4. + */ +cudaError_t cudaUYVYToRGBA( uchar2* input, size_t inputPitch, uchar4* output, size_t outputPitch, size_t width, size_t height ); + +/** + * Convert a YUYV 422 packed image into RGBA uchar4. + */ +cudaError_t cudaYUYVToRGBA( uchar2* input, uchar4* output, size_t width, size_t height ); + +/** + * Convert a YUYV 422 packed image into RGBA uchar4. + */ +cudaError_t cudaYUYVToRGBA( uchar2* input, size_t inputPitch, uchar4* output, size_t outputPitch, size_t width, size_t height ); + +///@} + + +////////////////////////////////////////////////////////////////////////////////// +/// @name UYUV 4:2:2 packed (UYVY & YUYV) to grayscale +////////////////////////////////////////////////////////////////////////////////// + +///@{ + +/** + * Convert a UYVY 422 packed image into a uint8 grayscale. + */ +cudaError_t cudaUYVYToGray( uchar2* input, float* output, size_t width, size_t height ); + +/** + * Convert a UYVY 422 packed image into a uint8 grayscale. + */ +cudaError_t cudaUYVYToGray( uchar2* input, size_t inputPitch, float* output, size_t outputPitch, size_t width, size_t height ); + +/** + * Convert a YUYV 422 packed image into a uint8 grayscale. + */ +cudaError_t cudaYUYVToGray( uchar2* input, float* output, size_t width, size_t height ); + +/** + * Convert a YUYV 422 packed image into a uint8 grayscale. + */ +cudaError_t cudaYUYVToGray( uchar2* input, size_t inputPitch, float* output, size_t outputPitch, size_t width, size_t height ); + +///@} + + +////////////////////////////////////////////////////////////////////////////////// +/// @name YUV NV12 to RGBA +////////////////////////////////////////////////////////////////////////////////// + +///@{ + +/** + * Convert an NV12 texture (semi-planar 4:2:0) to ARGB uchar4 format. + * NV12 = 8-bit Y plane followed by an interleaved U/V plane with 2x2 subsampling. + */ +cudaError_t cudaNV12ToRGBA( uint8_t* input, size_t inputPitch, uchar4* output, size_t outputPitch, size_t width, size_t height ); +cudaError_t cudaNV12ToRGBA( uint8_t* input, uchar4* output, size_t width, size_t height ); + +cudaError_t cudaNV12ToRGBAf( uint8_t* input, size_t inputPitch, float4* output, size_t outputPitch, size_t width, size_t height ); +cudaError_t cudaNV12ToRGBAf( uint8_t* input, float4* output, size_t width, size_t height ); + +/** + * Setup NV12 color conversion constants. + * cudaNV12SetupColorspace() isn't necessary for the user to call, it will be + * called automatically by cudaNV12ToRGBA() with a hue of 0.0. + * However if you want to setup custom constants (ie with a hue different than 0), + * then you can call cudaNV12SetupColorspace() at any time, overriding the default. + */ +cudaError_t cudaNV12SetupColorspace( float hue = 0.0f ); + +///@} + +#endif + diff --git a/data/images/banana_0.jpg b/data/images/banana_0.jpg new file mode 100644 index 000000000..5c77a2afe Binary files /dev/null and b/data/images/banana_0.jpg differ diff --git a/data/images/granny_smith_0.jpg b/data/images/granny_smith_0.jpg new file mode 100644 index 000000000..78d19219b Binary files /dev/null and b/data/images/granny_smith_0.jpg differ diff --git a/data/images/orange_0.jpg b/data/images/orange_0.jpg new file mode 100644 index 000000000..d1566b796 Binary files /dev/null and b/data/images/orange_0.jpg differ diff --git a/data/images/red_apple_0.jpg b/data/images/red_apple_0.jpg new file mode 100644 index 000000000..3f9033515 Binary files /dev/null and b/data/images/red_apple_0.jpg differ diff --git a/data/networks/alexnet.prototxt b/data/networks/alexnet.prototxt new file mode 100644 index 000000000..45b2b0e36 --- /dev/null +++ b/data/networks/alexnet.prototxt @@ -0,0 +1,277 @@ +name: "AlexNet" +layer { + name: "data" + type: "Input" + top: "data" + input_param { shape: { dim: 10 dim: 3 dim: 227 dim: 227 } } +} +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 96 + kernel_size: 11 + stride: 4 + } +} +layer { + name: "relu1" + type: "ReLU" + bottom: "conv1" + top: "conv1" +} +layer { + name: "norm1" + type: "LRN" + bottom: "conv1" + top: "norm1" + lrn_param { + local_size: 5 + alpha: 0.0001 + beta: 0.75 + } +} +layer { + name: "pool1" + type: "Pooling" + bottom: "norm1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "conv2" + type: "Convolution" + bottom: "pool1" + top: "conv2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 2 + kernel_size: 5 + group: 2 + } +} +layer { + name: "relu2" + type: "ReLU" + bottom: "conv2" + top: "conv2" +} +layer { + name: "norm2" + type: "LRN" + bottom: "conv2" + top: "norm2" + lrn_param { + local_size: 5 + alpha: 0.0001 + beta: 0.75 + } +} +layer { + name: "pool2" + type: "Pooling" + bottom: "norm2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "conv3" + type: "Convolution" + bottom: "pool2" + top: "conv3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 384 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu3" + type: "ReLU" + bottom: "conv3" + top: "conv3" +} +layer { + name: "conv4" + type: "Convolution" + bottom: "conv3" + top: "conv4" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 384 + pad: 1 + kernel_size: 3 + group: 2 + } +} +layer { + name: "relu4" + type: "ReLU" + bottom: "conv4" + top: "conv4" +} +layer { + name: "conv5" + type: "Convolution" + bottom: "conv4" + top: "conv5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + group: 2 + } +} +layer { + name: "relu5" + type: "ReLU" + bottom: "conv5" + top: "conv5" +} +layer { + name: "pool5" + type: "Pooling" + bottom: "conv5" + top: "pool5" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "fc6" + type: "InnerProduct" + bottom: "pool5" + top: "fc6" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 4096 + } +} +layer { + name: "relu6" + type: "ReLU" + bottom: "fc6" + top: "fc6" +} +layer { + name: "drop6" + type: "Dropout" + bottom: "fc6" + top: "fc6" + dropout_param { + dropout_ratio: 0.5 + } +} +layer { + name: "fc7" + type: "InnerProduct" + bottom: "fc6" + top: "fc7" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 4096 + } +} +layer { + name: "relu7" + type: "ReLU" + bottom: "fc7" + top: "fc7" +} +layer { + name: "drop7" + type: "Dropout" + bottom: "fc7" + top: "fc7" + dropout_param { + dropout_ratio: 0.5 + } +} +layer { + name: "fc8" + type: "InnerProduct" + bottom: "fc7" + top: "fc8" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 1000 + } +} +layer { + name: "prob" + type: "Softmax" + bottom: "fc8" + top: "prob" +} diff --git a/data/networks/googlenet.prototxt b/data/networks/googlenet.prototxt new file mode 100644 index 000000000..0c296a754 --- /dev/null +++ b/data/networks/googlenet.prototxt @@ -0,0 +1,2157 @@ +name: "GoogleNet" +layer { + name: "data" + type: "Input" + top: "data" + input_param { shape: { dim: 1 dim: 3 dim: 224 dim: 224 } } +} +layer { + name: "conv1/7x7_s2" + type: "Convolution" + bottom: "data" + top: "conv1/7x7_s2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 3 + kernel_size: 7 + stride: 2 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "conv1/relu_7x7" + type: "ReLU" + bottom: "conv1/7x7_s2" + top: "conv1/7x7_s2" +} +layer { + name: "pool1/3x3_s2" + type: "Pooling" + bottom: "conv1/7x7_s2" + top: "pool1/3x3_s2" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "pool1/norm1" + type: "LRN" + bottom: "pool1/3x3_s2" + top: "pool1/norm1" + lrn_param { + local_size: 5 + alpha: 0.0001 + beta: 0.75 + } +} +layer { + name: "conv2/3x3_reduce" + type: "Convolution" + bottom: "pool1/norm1" + top: "conv2/3x3_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "conv2/relu_3x3_reduce" + type: "ReLU" + bottom: "conv2/3x3_reduce" + top: "conv2/3x3_reduce" +} +layer { + name: "conv2/3x3" + type: "Convolution" + bottom: "conv2/3x3_reduce" + top: "conv2/3x3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 192 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "conv2/relu_3x3" + type: "ReLU" + bottom: "conv2/3x3" + top: "conv2/3x3" +} +layer { + name: "conv2/norm2" + type: "LRN" + bottom: "conv2/3x3" + top: "conv2/norm2" + lrn_param { + local_size: 5 + alpha: 0.0001 + beta: 0.75 + } +} +layer { + name: "pool2/3x3_s2" + type: "Pooling" + bottom: "conv2/norm2" + top: "pool2/3x3_s2" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "inception_3a/1x1" + type: "Convolution" + bottom: "pool2/3x3_s2" + top: "inception_3a/1x1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3a/relu_1x1" + type: "ReLU" + bottom: "inception_3a/1x1" + top: "inception_3a/1x1" +} +layer { + name: "inception_3a/3x3_reduce" + type: "Convolution" + bottom: "pool2/3x3_s2" + top: "inception_3a/3x3_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 96 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.09 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3a/relu_3x3_reduce" + type: "ReLU" + bottom: "inception_3a/3x3_reduce" + top: "inception_3a/3x3_reduce" +} +layer { + name: "inception_3a/3x3" + type: "Convolution" + bottom: "inception_3a/3x3_reduce" + top: "inception_3a/3x3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3a/relu_3x3" + type: "ReLU" + bottom: "inception_3a/3x3" + top: "inception_3a/3x3" +} +layer { + name: "inception_3a/5x5_reduce" + type: "Convolution" + bottom: "pool2/3x3_s2" + top: "inception_3a/5x5_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.2 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3a/relu_5x5_reduce" + type: "ReLU" + bottom: "inception_3a/5x5_reduce" + top: "inception_3a/5x5_reduce" +} +layer { + name: "inception_3a/5x5" + type: "Convolution" + bottom: "inception_3a/5x5_reduce" + top: "inception_3a/5x5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 32 + pad: 2 + kernel_size: 5 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3a/relu_5x5" + type: "ReLU" + bottom: "inception_3a/5x5" + top: "inception_3a/5x5" +} +layer { + name: "inception_3a/pool" + type: "Pooling" + bottom: "pool2/3x3_s2" + top: "inception_3a/pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + name: "inception_3a/pool_proj" + type: "Convolution" + bottom: "inception_3a/pool" + top: "inception_3a/pool_proj" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 32 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3a/relu_pool_proj" + type: "ReLU" + bottom: "inception_3a/pool_proj" + top: "inception_3a/pool_proj" +} +layer { + name: "inception_3a/output" + type: "Concat" + bottom: "inception_3a/1x1" + bottom: "inception_3a/3x3" + bottom: "inception_3a/5x5" + bottom: "inception_3a/pool_proj" + top: "inception_3a/output" +} +layer { + name: "inception_3b/1x1" + type: "Convolution" + bottom: "inception_3a/output" + top: "inception_3b/1x1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3b/relu_1x1" + type: "ReLU" + bottom: "inception_3b/1x1" + top: "inception_3b/1x1" +} +layer { + name: "inception_3b/3x3_reduce" + type: "Convolution" + bottom: "inception_3a/output" + top: "inception_3b/3x3_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.09 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3b/relu_3x3_reduce" + type: "ReLU" + bottom: "inception_3b/3x3_reduce" + top: "inception_3b/3x3_reduce" +} +layer { + name: "inception_3b/3x3" + type: "Convolution" + bottom: "inception_3b/3x3_reduce" + top: "inception_3b/3x3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 192 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3b/relu_3x3" + type: "ReLU" + bottom: "inception_3b/3x3" + top: "inception_3b/3x3" +} +layer { + name: "inception_3b/5x5_reduce" + type: "Convolution" + bottom: "inception_3a/output" + top: "inception_3b/5x5_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 32 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.2 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3b/relu_5x5_reduce" + type: "ReLU" + bottom: "inception_3b/5x5_reduce" + top: "inception_3b/5x5_reduce" +} +layer { + name: "inception_3b/5x5" + type: "Convolution" + bottom: "inception_3b/5x5_reduce" + top: "inception_3b/5x5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 96 + pad: 2 + kernel_size: 5 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3b/relu_5x5" + type: "ReLU" + bottom: "inception_3b/5x5" + top: "inception_3b/5x5" +} +layer { + name: "inception_3b/pool" + type: "Pooling" + bottom: "inception_3a/output" + top: "inception_3b/pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + name: "inception_3b/pool_proj" + type: "Convolution" + bottom: "inception_3b/pool" + top: "inception_3b/pool_proj" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3b/relu_pool_proj" + type: "ReLU" + bottom: "inception_3b/pool_proj" + top: "inception_3b/pool_proj" +} +layer { + name: "inception_3b/output" + type: "Concat" + bottom: "inception_3b/1x1" + bottom: "inception_3b/3x3" + bottom: "inception_3b/5x5" + bottom: "inception_3b/pool_proj" + top: "inception_3b/output" +} +layer { + name: "pool3/3x3_s2" + type: "Pooling" + bottom: "inception_3b/output" + top: "pool3/3x3_s2" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "inception_4a/1x1" + type: "Convolution" + bottom: "pool3/3x3_s2" + top: "inception_4a/1x1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 192 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4a/relu_1x1" + type: "ReLU" + bottom: "inception_4a/1x1" + top: "inception_4a/1x1" +} +layer { + name: "inception_4a/3x3_reduce" + type: "Convolution" + bottom: "pool3/3x3_s2" + top: "inception_4a/3x3_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 96 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.09 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4a/relu_3x3_reduce" + type: "ReLU" + bottom: "inception_4a/3x3_reduce" + top: "inception_4a/3x3_reduce" +} +layer { + name: "inception_4a/3x3" + type: "Convolution" + bottom: "inception_4a/3x3_reduce" + top: "inception_4a/3x3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 208 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4a/relu_3x3" + type: "ReLU" + bottom: "inception_4a/3x3" + top: "inception_4a/3x3" +} +layer { + name: "inception_4a/5x5_reduce" + type: "Convolution" + bottom: "pool3/3x3_s2" + top: "inception_4a/5x5_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.2 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4a/relu_5x5_reduce" + type: "ReLU" + bottom: "inception_4a/5x5_reduce" + top: "inception_4a/5x5_reduce" +} +layer { + name: "inception_4a/5x5" + type: "Convolution" + bottom: "inception_4a/5x5_reduce" + top: "inception_4a/5x5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 48 + pad: 2 + kernel_size: 5 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4a/relu_5x5" + type: "ReLU" + bottom: "inception_4a/5x5" + top: "inception_4a/5x5" +} +layer { + name: "inception_4a/pool" + type: "Pooling" + bottom: "pool3/3x3_s2" + top: "inception_4a/pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + name: "inception_4a/pool_proj" + type: "Convolution" + bottom: "inception_4a/pool" + top: "inception_4a/pool_proj" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4a/relu_pool_proj" + type: "ReLU" + bottom: "inception_4a/pool_proj" + top: "inception_4a/pool_proj" +} +layer { + name: "inception_4a/output" + type: "Concat" + bottom: "inception_4a/1x1" + bottom: "inception_4a/3x3" + bottom: "inception_4a/5x5" + bottom: "inception_4a/pool_proj" + top: "inception_4a/output" +} +layer { + name: "inception_4b/1x1" + type: "Convolution" + bottom: "inception_4a/output" + top: "inception_4b/1x1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 160 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4b/relu_1x1" + type: "ReLU" + bottom: "inception_4b/1x1" + top: "inception_4b/1x1" +} +layer { + name: "inception_4b/3x3_reduce" + type: "Convolution" + bottom: "inception_4a/output" + top: "inception_4b/3x3_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 112 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.09 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4b/relu_3x3_reduce" + type: "ReLU" + bottom: "inception_4b/3x3_reduce" + top: "inception_4b/3x3_reduce" +} +layer { + name: "inception_4b/3x3" + type: "Convolution" + bottom: "inception_4b/3x3_reduce" + top: "inception_4b/3x3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 224 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4b/relu_3x3" + type: "ReLU" + bottom: "inception_4b/3x3" + top: "inception_4b/3x3" +} +layer { + name: "inception_4b/5x5_reduce" + type: "Convolution" + bottom: "inception_4a/output" + top: "inception_4b/5x5_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.2 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4b/relu_5x5_reduce" + type: "ReLU" + bottom: "inception_4b/5x5_reduce" + top: "inception_4b/5x5_reduce" +} +layer { + name: "inception_4b/5x5" + type: "Convolution" + bottom: "inception_4b/5x5_reduce" + top: "inception_4b/5x5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 2 + kernel_size: 5 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4b/relu_5x5" + type: "ReLU" + bottom: "inception_4b/5x5" + top: "inception_4b/5x5" +} +layer { + name: "inception_4b/pool" + type: "Pooling" + bottom: "inception_4a/output" + top: "inception_4b/pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + name: "inception_4b/pool_proj" + type: "Convolution" + bottom: "inception_4b/pool" + top: "inception_4b/pool_proj" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4b/relu_pool_proj" + type: "ReLU" + bottom: "inception_4b/pool_proj" + top: "inception_4b/pool_proj" +} +layer { + name: "inception_4b/output" + type: "Concat" + bottom: "inception_4b/1x1" + bottom: "inception_4b/3x3" + bottom: "inception_4b/5x5" + bottom: "inception_4b/pool_proj" + top: "inception_4b/output" +} +layer { + name: "inception_4c/1x1" + type: "Convolution" + bottom: "inception_4b/output" + top: "inception_4c/1x1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4c/relu_1x1" + type: "ReLU" + bottom: "inception_4c/1x1" + top: "inception_4c/1x1" +} +layer { + name: "inception_4c/3x3_reduce" + type: "Convolution" + bottom: "inception_4b/output" + top: "inception_4c/3x3_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.09 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4c/relu_3x3_reduce" + type: "ReLU" + bottom: "inception_4c/3x3_reduce" + top: "inception_4c/3x3_reduce" +} +layer { + name: "inception_4c/3x3" + type: "Convolution" + bottom: "inception_4c/3x3_reduce" + top: "inception_4c/3x3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4c/relu_3x3" + type: "ReLU" + bottom: "inception_4c/3x3" + top: "inception_4c/3x3" +} +layer { + name: "inception_4c/5x5_reduce" + type: "Convolution" + bottom: "inception_4b/output" + top: "inception_4c/5x5_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.2 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4c/relu_5x5_reduce" + type: "ReLU" + bottom: "inception_4c/5x5_reduce" + top: "inception_4c/5x5_reduce" +} +layer { + name: "inception_4c/5x5" + type: "Convolution" + bottom: "inception_4c/5x5_reduce" + top: "inception_4c/5x5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 2 + kernel_size: 5 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4c/relu_5x5" + type: "ReLU" + bottom: "inception_4c/5x5" + top: "inception_4c/5x5" +} +layer { + name: "inception_4c/pool" + type: "Pooling" + bottom: "inception_4b/output" + top: "inception_4c/pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + name: "inception_4c/pool_proj" + type: "Convolution" + bottom: "inception_4c/pool" + top: "inception_4c/pool_proj" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4c/relu_pool_proj" + type: "ReLU" + bottom: "inception_4c/pool_proj" + top: "inception_4c/pool_proj" +} +layer { + name: "inception_4c/output" + type: "Concat" + bottom: "inception_4c/1x1" + bottom: "inception_4c/3x3" + bottom: "inception_4c/5x5" + bottom: "inception_4c/pool_proj" + top: "inception_4c/output" +} +layer { + name: "inception_4d/1x1" + type: "Convolution" + bottom: "inception_4c/output" + top: "inception_4d/1x1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 112 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4d/relu_1x1" + type: "ReLU" + bottom: "inception_4d/1x1" + top: "inception_4d/1x1" +} +layer { + name: "inception_4d/3x3_reduce" + type: "Convolution" + bottom: "inception_4c/output" + top: "inception_4d/3x3_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 144 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.09 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4d/relu_3x3_reduce" + type: "ReLU" + bottom: "inception_4d/3x3_reduce" + top: "inception_4d/3x3_reduce" +} +layer { + name: "inception_4d/3x3" + type: "Convolution" + bottom: "inception_4d/3x3_reduce" + top: "inception_4d/3x3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 288 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4d/relu_3x3" + type: "ReLU" + bottom: "inception_4d/3x3" + top: "inception_4d/3x3" +} +layer { + name: "inception_4d/5x5_reduce" + type: "Convolution" + bottom: "inception_4c/output" + top: "inception_4d/5x5_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 32 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.2 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4d/relu_5x5_reduce" + type: "ReLU" + bottom: "inception_4d/5x5_reduce" + top: "inception_4d/5x5_reduce" +} +layer { + name: "inception_4d/5x5" + type: "Convolution" + bottom: "inception_4d/5x5_reduce" + top: "inception_4d/5x5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 2 + kernel_size: 5 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4d/relu_5x5" + type: "ReLU" + bottom: "inception_4d/5x5" + top: "inception_4d/5x5" +} +layer { + name: "inception_4d/pool" + type: "Pooling" + bottom: "inception_4c/output" + top: "inception_4d/pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + name: "inception_4d/pool_proj" + type: "Convolution" + bottom: "inception_4d/pool" + top: "inception_4d/pool_proj" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4d/relu_pool_proj" + type: "ReLU" + bottom: "inception_4d/pool_proj" + top: "inception_4d/pool_proj" +} +layer { + name: "inception_4d/output" + type: "Concat" + bottom: "inception_4d/1x1" + bottom: "inception_4d/3x3" + bottom: "inception_4d/5x5" + bottom: "inception_4d/pool_proj" + top: "inception_4d/output" +} +layer { + name: "inception_4e/1x1" + type: "Convolution" + bottom: "inception_4d/output" + top: "inception_4e/1x1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4e/relu_1x1" + type: "ReLU" + bottom: "inception_4e/1x1" + top: "inception_4e/1x1" +} +layer { + name: "inception_4e/3x3_reduce" + type: "Convolution" + bottom: "inception_4d/output" + top: "inception_4e/3x3_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 160 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.09 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4e/relu_3x3_reduce" + type: "ReLU" + bottom: "inception_4e/3x3_reduce" + top: "inception_4e/3x3_reduce" +} +layer { + name: "inception_4e/3x3" + type: "Convolution" + bottom: "inception_4e/3x3_reduce" + top: "inception_4e/3x3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 320 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4e/relu_3x3" + type: "ReLU" + bottom: "inception_4e/3x3" + top: "inception_4e/3x3" +} +layer { + name: "inception_4e/5x5_reduce" + type: "Convolution" + bottom: "inception_4d/output" + top: "inception_4e/5x5_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 32 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.2 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4e/relu_5x5_reduce" + type: "ReLU" + bottom: "inception_4e/5x5_reduce" + top: "inception_4e/5x5_reduce" +} +layer { + name: "inception_4e/5x5" + type: "Convolution" + bottom: "inception_4e/5x5_reduce" + top: "inception_4e/5x5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 2 + kernel_size: 5 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4e/relu_5x5" + type: "ReLU" + bottom: "inception_4e/5x5" + top: "inception_4e/5x5" +} +layer { + name: "inception_4e/pool" + type: "Pooling" + bottom: "inception_4d/output" + top: "inception_4e/pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + name: "inception_4e/pool_proj" + type: "Convolution" + bottom: "inception_4e/pool" + top: "inception_4e/pool_proj" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4e/relu_pool_proj" + type: "ReLU" + bottom: "inception_4e/pool_proj" + top: "inception_4e/pool_proj" +} +layer { + name: "inception_4e/output" + type: "Concat" + bottom: "inception_4e/1x1" + bottom: "inception_4e/3x3" + bottom: "inception_4e/5x5" + bottom: "inception_4e/pool_proj" + top: "inception_4e/output" +} +layer { + name: "pool4/3x3_s2" + type: "Pooling" + bottom: "inception_4e/output" + top: "pool4/3x3_s2" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "inception_5a/1x1" + type: "Convolution" + bottom: "pool4/3x3_s2" + top: "inception_5a/1x1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5a/relu_1x1" + type: "ReLU" + bottom: "inception_5a/1x1" + top: "inception_5a/1x1" +} +layer { + name: "inception_5a/3x3_reduce" + type: "Convolution" + bottom: "pool4/3x3_s2" + top: "inception_5a/3x3_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 160 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.09 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5a/relu_3x3_reduce" + type: "ReLU" + bottom: "inception_5a/3x3_reduce" + top: "inception_5a/3x3_reduce" +} +layer { + name: "inception_5a/3x3" + type: "Convolution" + bottom: "inception_5a/3x3_reduce" + top: "inception_5a/3x3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 320 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5a/relu_3x3" + type: "ReLU" + bottom: "inception_5a/3x3" + top: "inception_5a/3x3" +} +layer { + name: "inception_5a/5x5_reduce" + type: "Convolution" + bottom: "pool4/3x3_s2" + top: "inception_5a/5x5_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 32 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.2 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5a/relu_5x5_reduce" + type: "ReLU" + bottom: "inception_5a/5x5_reduce" + top: "inception_5a/5x5_reduce" +} +layer { + name: "inception_5a/5x5" + type: "Convolution" + bottom: "inception_5a/5x5_reduce" + top: "inception_5a/5x5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 2 + kernel_size: 5 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5a/relu_5x5" + type: "ReLU" + bottom: "inception_5a/5x5" + top: "inception_5a/5x5" +} +layer { + name: "inception_5a/pool" + type: "Pooling" + bottom: "pool4/3x3_s2" + top: "inception_5a/pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + name: "inception_5a/pool_proj" + type: "Convolution" + bottom: "inception_5a/pool" + top: "inception_5a/pool_proj" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5a/relu_pool_proj" + type: "ReLU" + bottom: "inception_5a/pool_proj" + top: "inception_5a/pool_proj" +} +layer { + name: "inception_5a/output" + type: "Concat" + bottom: "inception_5a/1x1" + bottom: "inception_5a/3x3" + bottom: "inception_5a/5x5" + bottom: "inception_5a/pool_proj" + top: "inception_5a/output" +} +layer { + name: "inception_5b/1x1" + type: "Convolution" + bottom: "inception_5a/output" + top: "inception_5b/1x1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 384 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5b/relu_1x1" + type: "ReLU" + bottom: "inception_5b/1x1" + top: "inception_5b/1x1" +} +layer { + name: "inception_5b/3x3_reduce" + type: "Convolution" + bottom: "inception_5a/output" + top: "inception_5b/3x3_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 192 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.09 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5b/relu_3x3_reduce" + type: "ReLU" + bottom: "inception_5b/3x3_reduce" + top: "inception_5b/3x3_reduce" +} +layer { + name: "inception_5b/3x3" + type: "Convolution" + bottom: "inception_5b/3x3_reduce" + top: "inception_5b/3x3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 384 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5b/relu_3x3" + type: "ReLU" + bottom: "inception_5b/3x3" + top: "inception_5b/3x3" +} +layer { + name: "inception_5b/5x5_reduce" + type: "Convolution" + bottom: "inception_5a/output" + top: "inception_5b/5x5_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 48 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.2 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5b/relu_5x5_reduce" + type: "ReLU" + bottom: "inception_5b/5x5_reduce" + top: "inception_5b/5x5_reduce" +} +layer { + name: "inception_5b/5x5" + type: "Convolution" + bottom: "inception_5b/5x5_reduce" + top: "inception_5b/5x5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 2 + kernel_size: 5 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5b/relu_5x5" + type: "ReLU" + bottom: "inception_5b/5x5" + top: "inception_5b/5x5" +} +layer { + name: "inception_5b/pool" + type: "Pooling" + bottom: "inception_5a/output" + top: "inception_5b/pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + name: "inception_5b/pool_proj" + type: "Convolution" + bottom: "inception_5b/pool" + top: "inception_5b/pool_proj" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5b/relu_pool_proj" + type: "ReLU" + bottom: "inception_5b/pool_proj" + top: "inception_5b/pool_proj" +} +layer { + name: "inception_5b/output" + type: "Concat" + bottom: "inception_5b/1x1" + bottom: "inception_5b/3x3" + bottom: "inception_5b/5x5" + bottom: "inception_5b/pool_proj" + top: "inception_5b/output" +} +layer { + name: "pool5/7x7_s1" + type: "Pooling" + bottom: "inception_5b/output" + top: "pool5/7x7_s1" + pooling_param { + pool: AVE + kernel_size: 7 + stride: 1 + } +} +layer { + name: "pool5/drop_7x7_s1" + type: "Dropout" + bottom: "pool5/7x7_s1" + top: "pool5/7x7_s1" + dropout_param { + dropout_ratio: 0.4 + } +} +layer { + name: "loss3/classifier" + type: "InnerProduct" + bottom: "pool5/7x7_s1" + top: "loss3/classifier" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 1000 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "prob" + type: "Softmax" + bottom: "loss3/classifier" + top: "prob" +} diff --git a/data/networks/ilsvrc12_synset_words.txt b/data/networks/ilsvrc12_synset_words.txt new file mode 100644 index 000000000..a9e8c7f50 --- /dev/null +++ b/data/networks/ilsvrc12_synset_words.txt @@ -0,0 +1,1000 @@ +n01440764 tench, Tinca tinca +n01443537 goldfish, Carassius auratus +n01484850 great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias +n01491361 tiger shark, Galeocerdo cuvieri +n01494475 hammerhead, hammerhead shark +n01496331 electric ray, crampfish, numbfish, torpedo +n01498041 stingray +n01514668 cock +n01514859 hen +n01518878 ostrich, Struthio camelus +n01530575 brambling, Fringilla montifringilla +n01531178 goldfinch, Carduelis carduelis +n01532829 house finch, linnet, Carpodacus mexicanus +n01534433 junco, snowbird +n01537544 indigo bunting, indigo finch, indigo bird, Passerina cyanea +n01558993 robin, American robin, Turdus migratorius +n01560419 bulbul +n01580077 jay +n01582220 magpie +n01592084 chickadee +n01601694 water ouzel, dipper +n01608432 kite +n01614925 bald eagle, American eagle, Haliaeetus leucocephalus +n01616318 vulture +n01622779 great grey owl, great gray owl, Strix nebulosa +n01629819 European fire salamander, Salamandra salamandra +n01630670 common newt, Triturus vulgaris +n01631663 eft +n01632458 spotted salamander, Ambystoma maculatum +n01632777 axolotl, mud puppy, Ambystoma mexicanum +n01641577 bullfrog, Rana catesbeiana +n01644373 tree frog, tree-frog +n01644900 tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui +n01664065 loggerhead, loggerhead turtle, Caretta caretta +n01665541 leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea +n01667114 mud turtle +n01667778 terrapin +n01669191 box turtle, box tortoise +n01675722 banded gecko +n01677366 common iguana, iguana, Iguana iguana +n01682714 American chameleon, anole, Anolis carolinensis +n01685808 whiptail, whiptail lizard +n01687978 agama +n01688243 frilled lizard, Chlamydosaurus kingi +n01689811 alligator lizard +n01692333 Gila monster, Heloderma suspectum +n01693334 green lizard, Lacerta viridis +n01694178 African chameleon, Chamaeleo chamaeleon +n01695060 Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis +n01697457 African crocodile, Nile crocodile, Crocodylus niloticus +n01698640 American alligator, Alligator mississipiensis +n01704323 triceratops +n01728572 thunder snake, worm snake, Carphophis amoenus +n01728920 ringneck snake, ring-necked snake, ring snake +n01729322 hognose snake, puff adder, sand viper +n01729977 green snake, grass snake +n01734418 king snake, kingsnake +n01735189 garter snake, grass snake +n01737021 water snake +n01739381 vine snake +n01740131 night snake, Hypsiglena torquata +n01742172 boa constrictor, Constrictor constrictor +n01744401 rock python, rock snake, Python sebae +n01748264 Indian cobra, Naja naja +n01749939 green mamba +n01751748 sea snake +n01753488 horned viper, cerastes, sand viper, horned asp, Cerastes cornutus +n01755581 diamondback, diamondback rattlesnake, Crotalus adamanteus +n01756291 sidewinder, horned rattlesnake, Crotalus cerastes +n01768244 trilobite +n01770081 harvestman, daddy longlegs, Phalangium opilio +n01770393 scorpion +n01773157 black and gold garden spider, Argiope aurantia +n01773549 barn spider, Araneus cavaticus +n01773797 garden spider, Aranea diademata +n01774384 black widow, Latrodectus mactans +n01774750 tarantula +n01775062 wolf spider, hunting spider +n01776313 tick +n01784675 centipede +n01795545 black grouse +n01796340 ptarmigan +n01797886 ruffed grouse, partridge, Bonasa umbellus +n01798484 prairie chicken, prairie grouse, prairie fowl +n01806143 peacock +n01806567 quail +n01807496 partridge +n01817953 African grey, African gray, Psittacus erithacus +n01818515 macaw +n01819313 sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita +n01820546 lorikeet +n01824575 coucal +n01828970 bee eater +n01829413 hornbill +n01833805 hummingbird +n01843065 jacamar +n01843383 toucan +n01847000 drake +n01855032 red-breasted merganser, Mergus serrator +n01855672 goose +n01860187 black swan, Cygnus atratus +n01871265 tusker +n01872401 echidna, spiny anteater, anteater +n01873310 platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus +n01877812 wallaby, brush kangaroo +n01882714 koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus +n01883070 wombat +n01910747 jellyfish +n01914609 sea anemone, anemone +n01917289 brain coral +n01924916 flatworm, platyhelminth +n01930112 nematode, nematode worm, roundworm +n01943899 conch +n01944390 snail +n01945685 slug +n01950731 sea slug, nudibranch +n01955084 chiton, coat-of-mail shell, sea cradle, polyplacophore +n01968897 chambered nautilus, pearly nautilus, nautilus +n01978287 Dungeness crab, Cancer magister +n01978455 rock crab, Cancer irroratus +n01980166 fiddler crab +n01981276 king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica +n01983481 American lobster, Northern lobster, Maine lobster, Homarus americanus +n01984695 spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish +n01985128 crayfish, crawfish, crawdad, crawdaddy +n01986214 hermit crab +n01990800 isopod +n02002556 white stork, Ciconia ciconia +n02002724 black stork, Ciconia nigra +n02006656 spoonbill +n02007558 flamingo +n02009229 little blue heron, Egretta caerulea +n02009912 American egret, great white heron, Egretta albus +n02011460 bittern +n02012849 crane +n02013706 limpkin, Aramus pictus +n02017213 European gallinule, Porphyrio porphyrio +n02018207 American coot, marsh hen, mud hen, water hen, Fulica americana +n02018795 bustard +n02025239 ruddy turnstone, Arenaria interpres +n02027492 red-backed sandpiper, dunlin, Erolia alpina +n02028035 redshank, Tringa totanus +n02033041 dowitcher +n02037110 oystercatcher, oyster catcher +n02051845 pelican +n02056570 king penguin, Aptenodytes patagonica +n02058221 albatross, mollymawk +n02066245 grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus +n02071294 killer whale, killer, orca, grampus, sea wolf, Orcinus orca +n02074367 dugong, Dugong dugon +n02077923 sea lion +n02085620 Chihuahua +n02085782 Japanese spaniel +n02085936 Maltese dog, Maltese terrier, Maltese +n02086079 Pekinese, Pekingese, Peke +n02086240 Shih-Tzu +n02086646 Blenheim spaniel +n02086910 papillon +n02087046 toy terrier +n02087394 Rhodesian ridgeback +n02088094 Afghan hound, Afghan +n02088238 basset, basset hound +n02088364 beagle +n02088466 bloodhound, sleuthhound +n02088632 bluetick +n02089078 black-and-tan coonhound +n02089867 Walker hound, Walker foxhound +n02089973 English foxhound +n02090379 redbone +n02090622 borzoi, Russian wolfhound +n02090721 Irish wolfhound +n02091032 Italian greyhound +n02091134 whippet +n02091244 Ibizan hound, Ibizan Podenco +n02091467 Norwegian elkhound, elkhound +n02091635 otterhound, otter hound +n02091831 Saluki, gazelle hound +n02092002 Scottish deerhound, deerhound +n02092339 Weimaraner +n02093256 Staffordshire bullterrier, Staffordshire bull terrier +n02093428 American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier +n02093647 Bedlington terrier +n02093754 Border terrier +n02093859 Kerry blue terrier +n02093991 Irish terrier +n02094114 Norfolk terrier +n02094258 Norwich terrier +n02094433 Yorkshire terrier +n02095314 wire-haired fox terrier +n02095570 Lakeland terrier +n02095889 Sealyham terrier, Sealyham +n02096051 Airedale, Airedale terrier +n02096177 cairn, cairn terrier +n02096294 Australian terrier +n02096437 Dandie Dinmont, Dandie Dinmont terrier +n02096585 Boston bull, Boston terrier +n02097047 miniature schnauzer +n02097130 giant schnauzer +n02097209 standard schnauzer +n02097298 Scotch terrier, Scottish terrier, Scottie +n02097474 Tibetan terrier, chrysanthemum dog +n02097658 silky terrier, Sydney silky +n02098105 soft-coated wheaten terrier +n02098286 West Highland white terrier +n02098413 Lhasa, Lhasa apso +n02099267 flat-coated retriever +n02099429 curly-coated retriever +n02099601 golden retriever +n02099712 Labrador retriever +n02099849 Chesapeake Bay retriever +n02100236 German short-haired pointer +n02100583 vizsla, Hungarian pointer +n02100735 English setter +n02100877 Irish setter, red setter +n02101006 Gordon setter +n02101388 Brittany spaniel +n02101556 clumber, clumber spaniel +n02102040 English springer, English springer spaniel +n02102177 Welsh springer spaniel +n02102318 cocker spaniel, English cocker spaniel, cocker +n02102480 Sussex spaniel +n02102973 Irish water spaniel +n02104029 kuvasz +n02104365 schipperke +n02105056 groenendael +n02105162 malinois +n02105251 briard +n02105412 kelpie +n02105505 komondor +n02105641 Old English sheepdog, bobtail +n02105855 Shetland sheepdog, Shetland sheep dog, Shetland +n02106030 collie +n02106166 Border collie +n02106382 Bouvier des Flandres, Bouviers des Flandres +n02106550 Rottweiler +n02106662 German shepherd, German shepherd dog, German police dog, alsatian +n02107142 Doberman, Doberman pinscher +n02107312 miniature pinscher +n02107574 Greater Swiss Mountain dog +n02107683 Bernese mountain dog +n02107908 Appenzeller +n02108000 EntleBucher +n02108089 boxer +n02108422 bull mastiff +n02108551 Tibetan mastiff +n02108915 French bulldog +n02109047 Great Dane +n02109525 Saint Bernard, St Bernard +n02109961 Eskimo dog, husky +n02110063 malamute, malemute, Alaskan malamute +n02110185 Siberian husky +n02110341 dalmatian, coach dog, carriage dog +n02110627 affenpinscher, monkey pinscher, monkey dog +n02110806 basenji +n02110958 pug, pug-dog +n02111129 Leonberg +n02111277 Newfoundland, Newfoundland dog +n02111500 Great Pyrenees +n02111889 Samoyed, Samoyede +n02112018 Pomeranian +n02112137 chow, chow chow +n02112350 keeshond +n02112706 Brabancon griffon +n02113023 Pembroke, Pembroke Welsh corgi +n02113186 Cardigan, Cardigan Welsh corgi +n02113624 toy poodle +n02113712 miniature poodle +n02113799 standard poodle +n02113978 Mexican hairless +n02114367 timber wolf, grey wolf, gray wolf, Canis lupus +n02114548 white wolf, Arctic wolf, Canis lupus tundrarum +n02114712 red wolf, maned wolf, Canis rufus, Canis niger +n02114855 coyote, prairie wolf, brush wolf, Canis latrans +n02115641 dingo, warrigal, warragal, Canis dingo +n02115913 dhole, Cuon alpinus +n02116738 African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus +n02117135 hyena, hyaena +n02119022 red fox, Vulpes vulpes +n02119789 kit fox, Vulpes macrotis +n02120079 Arctic fox, white fox, Alopex lagopus +n02120505 grey fox, gray fox, Urocyon cinereoargenteus +n02123045 tabby, tabby cat +n02123159 tiger cat +n02123394 Persian cat +n02123597 Siamese cat, Siamese +n02124075 Egyptian cat +n02125311 cougar, puma, catamount, mountain lion, painter, panther, Felis concolor +n02127052 lynx, catamount +n02128385 leopard, Panthera pardus +n02128757 snow leopard, ounce, Panthera uncia +n02128925 jaguar, panther, Panthera onca, Felis onca +n02129165 lion, king of beasts, Panthera leo +n02129604 tiger, Panthera tigris +n02130308 cheetah, chetah, Acinonyx jubatus +n02132136 brown bear, bruin, Ursus arctos +n02133161 American black bear, black bear, Ursus americanus, Euarctos americanus +n02134084 ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus +n02134418 sloth bear, Melursus ursinus, Ursus ursinus +n02137549 mongoose +n02138441 meerkat, mierkat +n02165105 tiger beetle +n02165456 ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle +n02167151 ground beetle, carabid beetle +n02168699 long-horned beetle, longicorn, longicorn beetle +n02169497 leaf beetle, chrysomelid +n02172182 dung beetle +n02174001 rhinoceros beetle +n02177972 weevil +n02190166 fly +n02206856 bee +n02219486 ant, emmet, pismire +n02226429 grasshopper, hopper +n02229544 cricket +n02231487 walking stick, walkingstick, stick insect +n02233338 cockroach, roach +n02236044 mantis, mantid +n02256656 cicada, cicala +n02259212 leafhopper +n02264363 lacewing, lacewing fly +n02268443 dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk +n02268853 damselfly +n02276258 admiral +n02277742 ringlet, ringlet butterfly +n02279972 monarch, monarch butterfly, milkweed butterfly, Danaus plexippus +n02280649 cabbage butterfly +n02281406 sulphur butterfly, sulfur butterfly +n02281787 lycaenid, lycaenid butterfly +n02317335 starfish, sea star +n02319095 sea urchin +n02321529 sea cucumber, holothurian +n02325366 wood rabbit, cottontail, cottontail rabbit +n02326432 hare +n02328150 Angora, Angora rabbit +n02342885 hamster +n02346627 porcupine, hedgehog +n02356798 fox squirrel, eastern fox squirrel, Sciurus niger +n02361337 marmot +n02363005 beaver +n02364673 guinea pig, Cavia cobaya +n02389026 sorrel +n02391049 zebra +n02395406 hog, pig, grunter, squealer, Sus scrofa +n02396427 wild boar, boar, Sus scrofa +n02397096 warthog +n02398521 hippopotamus, hippo, river horse, Hippopotamus amphibius +n02403003 ox +n02408429 water buffalo, water ox, Asiatic buffalo, Bubalus bubalis +n02410509 bison +n02412080 ram, tup +n02415577 bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis +n02417914 ibex, Capra ibex +n02422106 hartebeest +n02422699 impala, Aepyceros melampus +n02423022 gazelle +n02437312 Arabian camel, dromedary, Camelus dromedarius +n02437616 llama +n02441942 weasel +n02442845 mink +n02443114 polecat, fitch, foulmart, foumart, Mustela putorius +n02443484 black-footed ferret, ferret, Mustela nigripes +n02444819 otter +n02445715 skunk, polecat, wood pussy +n02447366 badger +n02454379 armadillo +n02457408 three-toed sloth, ai, Bradypus tridactylus +n02480495 orangutan, orang, orangutang, Pongo pygmaeus +n02480855 gorilla, Gorilla gorilla +n02481823 chimpanzee, chimp, Pan troglodytes +n02483362 gibbon, Hylobates lar +n02483708 siamang, Hylobates syndactylus, Symphalangus syndactylus +n02484975 guenon, guenon monkey +n02486261 patas, hussar monkey, Erythrocebus patas +n02486410 baboon +n02487347 macaque +n02488291 langur +n02488702 colobus, colobus monkey +n02489166 proboscis monkey, Nasalis larvatus +n02490219 marmoset +n02492035 capuchin, ringtail, Cebus capucinus +n02492660 howler monkey, howler +n02493509 titi, titi monkey +n02493793 spider monkey, Ateles geoffroyi +n02494079 squirrel monkey, Saimiri sciureus +n02497673 Madagascar cat, ring-tailed lemur, Lemur catta +n02500267 indri, indris, Indri indri, Indri brevicaudatus +n02504013 Indian elephant, Elephas maximus +n02504458 African elephant, Loxodonta africana +n02509815 lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens +n02510455 giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca +n02514041 barracouta, snoek +n02526121 eel +n02536864 coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch +n02606052 rock beauty, Holocanthus tricolor +n02607072 anemone fish +n02640242 sturgeon +n02641379 gar, garfish, garpike, billfish, Lepisosteus osseus +n02643566 lionfish +n02655020 puffer, pufferfish, blowfish, globefish +n02666196 abacus +n02667093 abaya +n02669723 academic gown, academic robe, judge's robe +n02672831 accordion, piano accordion, squeeze box +n02676566 acoustic guitar +n02687172 aircraft carrier, carrier, flattop, attack aircraft carrier +n02690373 airliner +n02692877 airship, dirigible +n02699494 altar +n02701002 ambulance +n02704792 amphibian, amphibious vehicle +n02708093 analog clock +n02727426 apiary, bee house +n02730930 apron +n02747177 ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin +n02749479 assault rifle, assault gun +n02769748 backpack, back pack, knapsack, packsack, rucksack, haversack +n02776631 bakery, bakeshop, bakehouse +n02777292 balance beam, beam +n02782093 balloon +n02783161 ballpoint, ballpoint pen, ballpen, Biro +n02786058 Band Aid +n02787622 banjo +n02788148 bannister, banister, balustrade, balusters, handrail +n02790996 barbell +n02791124 barber chair +n02791270 barbershop +n02793495 barn +n02794156 barometer +n02795169 barrel, cask +n02797295 barrow, garden cart, lawn cart, wheelbarrow +n02799071 baseball +n02802426 basketball +n02804414 bassinet +n02804610 bassoon +n02807133 bathing cap, swimming cap +n02808304 bath towel +n02808440 bathtub, bathing tub, bath, tub +n02814533 beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon +n02814860 beacon, lighthouse, beacon light, pharos +n02815834 beaker +n02817516 bearskin, busby, shako +n02823428 beer bottle +n02823750 beer glass +n02825657 bell cote, bell cot +n02834397 bib +n02835271 bicycle-built-for-two, tandem bicycle, tandem +n02837789 bikini, two-piece +n02840245 binder, ring-binder +n02841315 binoculars, field glasses, opera glasses +n02843684 birdhouse +n02859443 boathouse +n02860847 bobsled, bobsleigh, bob +n02865351 bolo tie, bolo, bola tie, bola +n02869837 bonnet, poke bonnet +n02870880 bookcase +n02871525 bookshop, bookstore, bookstall +n02877765 bottlecap +n02879718 bow +n02883205 bow tie, bow-tie, bowtie +n02892201 brass, memorial tablet, plaque +n02892767 brassiere, bra, bandeau +n02894605 breakwater, groin, groyne, mole, bulwark, seawall, jetty +n02895154 breastplate, aegis, egis +n02906734 broom +n02909870 bucket, pail +n02910353 buckle +n02916936 bulletproof vest +n02917067 bullet train, bullet +n02927161 butcher shop, meat market +n02930766 cab, hack, taxi, taxicab +n02939185 caldron, cauldron +n02948072 candle, taper, wax light +n02950826 cannon +n02951358 canoe +n02951585 can opener, tin opener +n02963159 cardigan +n02965783 car mirror +n02966193 carousel, carrousel, merry-go-round, roundabout, whirligig +n02966687 carpenter's kit, tool kit +n02971356 carton +n02974003 car wheel +n02977058 cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM +n02978881 cassette +n02979186 cassette player +n02980441 castle +n02981792 catamaran +n02988304 CD player +n02992211 cello, violoncello +n02992529 cellular telephone, cellular phone, cellphone, cell, mobile phone +n02999410 chain +n03000134 chainlink fence +n03000247 chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour +n03000684 chain saw, chainsaw +n03014705 chest +n03016953 chiffonier, commode +n03017168 chime, bell, gong +n03018349 china cabinet, china closet +n03026506 Christmas stocking +n03028079 church, church building +n03032252 cinema, movie theater, movie theatre, movie house, picture palace +n03041632 cleaver, meat cleaver, chopper +n03042490 cliff dwelling +n03045698 cloak +n03047690 clog, geta, patten, sabot +n03062245 cocktail shaker +n03063599 coffee mug +n03063689 coffeepot +n03065424 coil, spiral, volute, whorl, helix +n03075370 combination lock +n03085013 computer keyboard, keypad +n03089624 confectionery, confectionary, candy store +n03095699 container ship, containership, container vessel +n03100240 convertible +n03109150 corkscrew, bottle screw +n03110669 cornet, horn, trumpet, trump +n03124043 cowboy boot +n03124170 cowboy hat, ten-gallon hat +n03125729 cradle +n03126707 crane +n03127747 crash helmet +n03127925 crate +n03131574 crib, cot +n03133878 Crock Pot +n03134739 croquet ball +n03141823 crutch +n03146219 cuirass +n03160309 dam, dike, dyke +n03179701 desk +n03180011 desktop computer +n03187595 dial telephone, dial phone +n03188531 diaper, nappy, napkin +n03196217 digital clock +n03197337 digital watch +n03201208 dining table, board +n03207743 dishrag, dishcloth +n03207941 dishwasher, dish washer, dishwashing machine +n03208938 disk brake, disc brake +n03216828 dock, dockage, docking facility +n03218198 dogsled, dog sled, dog sleigh +n03220513 dome +n03223299 doormat, welcome mat +n03240683 drilling platform, offshore rig +n03249569 drum, membranophone, tympan +n03250847 drumstick +n03255030 dumbbell +n03259280 Dutch oven +n03271574 electric fan, blower +n03272010 electric guitar +n03272562 electric locomotive +n03290653 entertainment center +n03291819 envelope +n03297495 espresso maker +n03314780 face powder +n03325584 feather boa, boa +n03337140 file, file cabinet, filing cabinet +n03344393 fireboat +n03345487 fire engine, fire truck +n03347037 fire screen, fireguard +n03355925 flagpole, flagstaff +n03372029 flute, transverse flute +n03376595 folding chair +n03379051 football helmet +n03384352 forklift +n03388043 fountain +n03388183 fountain pen +n03388549 four-poster +n03393912 freight car +n03394916 French horn, horn +n03400231 frying pan, frypan, skillet +n03404251 fur coat +n03417042 garbage truck, dustcart +n03424325 gasmask, respirator, gas helmet +n03425413 gas pump, gasoline pump, petrol pump, island dispenser +n03443371 goblet +n03444034 go-kart +n03445777 golf ball +n03445924 golfcart, golf cart +n03447447 gondola +n03447721 gong, tam-tam +n03450230 gown +n03452741 grand piano, grand +n03457902 greenhouse, nursery, glasshouse +n03459775 grille, radiator grille +n03461385 grocery store, grocery, food market, market +n03467068 guillotine +n03476684 hair slide +n03476991 hair spray +n03478589 half track +n03481172 hammer +n03482405 hamper +n03483316 hand blower, blow dryer, blow drier, hair dryer, hair drier +n03485407 hand-held computer, hand-held microcomputer +n03485794 handkerchief, hankie, hanky, hankey +n03492542 hard disc, hard disk, fixed disk +n03494278 harmonica, mouth organ, harp, mouth harp +n03495258 harp +n03496892 harvester, reaper +n03498962 hatchet +n03527444 holster +n03529860 home theater, home theatre +n03530642 honeycomb +n03532672 hook, claw +n03534580 hoopskirt, crinoline +n03535780 horizontal bar, high bar +n03538406 horse cart, horse-cart +n03544143 hourglass +n03584254 iPod +n03584829 iron, smoothing iron +n03590841 jack-o'-lantern +n03594734 jean, blue jean, denim +n03594945 jeep, landrover +n03595614 jersey, T-shirt, tee shirt +n03598930 jigsaw puzzle +n03599486 jinrikisha, ricksha, rickshaw +n03602883 joystick +n03617480 kimono +n03623198 knee pad +n03627232 knot +n03630383 lab coat, laboratory coat +n03633091 ladle +n03637318 lampshade, lamp shade +n03642806 laptop, laptop computer +n03649909 lawn mower, mower +n03657121 lens cap, lens cover +n03658185 letter opener, paper knife, paperknife +n03661043 library +n03662601 lifeboat +n03666591 lighter, light, igniter, ignitor +n03670208 limousine, limo +n03673027 liner, ocean liner +n03676483 lipstick, lip rouge +n03680355 Loafer +n03690938 lotion +n03691459 loudspeaker, speaker, speaker unit, loudspeaker system, speaker system +n03692522 loupe, jeweler's loupe +n03697007 lumbermill, sawmill +n03706229 magnetic compass +n03709823 mailbag, postbag +n03710193 mailbox, letter box +n03710637 maillot +n03710721 maillot, tank suit +n03717622 manhole cover +n03720891 maraca +n03721384 marimba, xylophone +n03724870 mask +n03729826 matchstick +n03733131 maypole +n03733281 maze, labyrinth +n03733805 measuring cup +n03742115 medicine chest, medicine cabinet +n03743016 megalith, megalithic structure +n03759954 microphone, mike +n03761084 microwave, microwave oven +n03763968 military uniform +n03764736 milk can +n03769881 minibus +n03770439 miniskirt, mini +n03770679 minivan +n03773504 missile +n03775071 mitten +n03775546 mixing bowl +n03776460 mobile home, manufactured home +n03777568 Model T +n03777754 modem +n03781244 monastery +n03782006 monitor +n03785016 moped +n03786901 mortar +n03787032 mortarboard +n03788195 mosque +n03788365 mosquito net +n03791053 motor scooter, scooter +n03792782 mountain bike, all-terrain bike, off-roader +n03792972 mountain tent +n03793489 mouse, computer mouse +n03794056 mousetrap +n03796401 moving van +n03803284 muzzle +n03804744 nail +n03814639 neck brace +n03814906 necklace +n03825788 nipple +n03832673 notebook, notebook computer +n03837869 obelisk +n03838899 oboe, hautboy, hautbois +n03840681 ocarina, sweet potato +n03841143 odometer, hodometer, mileometer, milometer +n03843555 oil filter +n03854065 organ, pipe organ +n03857828 oscilloscope, scope, cathode-ray oscilloscope, CRO +n03866082 overskirt +n03868242 oxcart +n03868863 oxygen mask +n03871628 packet +n03873416 paddle, boat paddle +n03874293 paddlewheel, paddle wheel +n03874599 padlock +n03876231 paintbrush +n03877472 pajama, pyjama, pj's, jammies +n03877845 palace +n03884397 panpipe, pandean pipe, syrinx +n03887697 paper towel +n03888257 parachute, chute +n03888605 parallel bars, bars +n03891251 park bench +n03891332 parking meter +n03895866 passenger car, coach, carriage +n03899768 patio, terrace +n03902125 pay-phone, pay-station +n03903868 pedestal, plinth, footstall +n03908618 pencil box, pencil case +n03908714 pencil sharpener +n03916031 perfume, essence +n03920288 Petri dish +n03924679 photocopier +n03929660 pick, plectrum, plectron +n03929855 pickelhaube +n03930313 picket fence, paling +n03930630 pickup, pickup truck +n03933933 pier +n03935335 piggy bank, penny bank +n03937543 pill bottle +n03938244 pillow +n03942813 ping-pong ball +n03944341 pinwheel +n03947888 pirate, pirate ship +n03950228 pitcher, ewer +n03954731 plane, carpenter's plane, woodworking plane +n03956157 planetarium +n03958227 plastic bag +n03961711 plate rack +n03967562 plow, plough +n03970156 plunger, plumber's helper +n03976467 Polaroid camera, Polaroid Land camera +n03976657 pole +n03977966 police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria +n03980874 poncho +n03982430 pool table, billiard table, snooker table +n03983396 pop bottle, soda bottle +n03991062 pot, flowerpot +n03992509 potter's wheel +n03995372 power drill +n03998194 prayer rug, prayer mat +n04004767 printer +n04005630 prison, prison house +n04008634 projectile, missile +n04009552 projector +n04019541 puck, hockey puck +n04023962 punching bag, punch bag, punching ball, punchball +n04026417 purse +n04033901 quill, quill pen +n04033995 quilt, comforter, comfort, puff +n04037443 racer, race car, racing car +n04039381 racket, racquet +n04040759 radiator +n04041544 radio, wireless +n04044716 radio telescope, radio reflector +n04049303 rain barrel +n04065272 recreational vehicle, RV, R.V. +n04067472 reel +n04069434 reflex camera +n04070727 refrigerator, icebox +n04074963 remote control, remote +n04081281 restaurant, eating house, eating place, eatery +n04086273 revolver, six-gun, six-shooter +n04090263 rifle +n04099969 rocking chair, rocker +n04111531 rotisserie +n04116512 rubber eraser, rubber, pencil eraser +n04118538 rugby ball +n04118776 rule, ruler +n04120489 running shoe +n04125021 safe +n04127249 safety pin +n04131690 saltshaker, salt shaker +n04133789 sandal +n04136333 sarong +n04141076 sax, saxophone +n04141327 scabbard +n04141975 scale, weighing machine +n04146614 school bus +n04147183 schooner +n04149813 scoreboard +n04152593 screen, CRT screen +n04153751 screw +n04154565 screwdriver +n04162706 seat belt, seatbelt +n04179913 sewing machine +n04192698 shield, buckler +n04200800 shoe shop, shoe-shop, shoe store +n04201297 shoji +n04204238 shopping basket +n04204347 shopping cart +n04208210 shovel +n04209133 shower cap +n04209239 shower curtain +n04228054 ski +n04229816 ski mask +n04235860 sleeping bag +n04238763 slide rule, slipstick +n04239074 sliding door +n04243546 slot, one-armed bandit +n04251144 snorkel +n04252077 snowmobile +n04252225 snowplow, snowplough +n04254120 soap dispenser +n04254680 soccer ball +n04254777 sock +n04258138 solar dish, solar collector, solar furnace +n04259630 sombrero +n04263257 soup bowl +n04264628 space bar +n04265275 space heater +n04266014 space shuttle +n04270147 spatula +n04273569 speedboat +n04275548 spider web, spider's web +n04277352 spindle +n04285008 sports car, sport car +n04286575 spotlight, spot +n04296562 stage +n04310018 steam locomotive +n04311004 steel arch bridge +n04311174 steel drum +n04317175 stethoscope +n04325704 stole +n04326547 stone wall +n04328186 stopwatch, stop watch +n04330267 stove +n04332243 strainer +n04335435 streetcar, tram, tramcar, trolley, trolley car +n04336792 stretcher +n04344873 studio couch, day bed +n04346328 stupa, tope +n04347754 submarine, pigboat, sub, U-boat +n04350905 suit, suit of clothes +n04355338 sundial +n04355933 sunglass +n04356056 sunglasses, dark glasses, shades +n04357314 sunscreen, sunblock, sun blocker +n04366367 suspension bridge +n04367480 swab, swob, mop +n04370456 sweatshirt +n04371430 swimming trunks, bathing trunks +n04371774 swing +n04372370 switch, electric switch, electrical switch +n04376876 syringe +n04380533 table lamp +n04389033 tank, army tank, armored combat vehicle, armoured combat vehicle +n04392985 tape player +n04398044 teapot +n04399382 teddy, teddy bear +n04404412 television, television system +n04409515 tennis ball +n04417672 thatch, thatched roof +n04418357 theater curtain, theatre curtain +n04423845 thimble +n04428191 thresher, thrasher, threshing machine +n04429376 throne +n04435653 tile roof +n04442312 toaster +n04443257 tobacco shop, tobacconist shop, tobacconist +n04447861 toilet seat +n04456115 torch +n04458633 totem pole +n04461696 tow truck, tow car, wrecker +n04462240 toyshop +n04465501 tractor +n04467665 trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi +n04476259 tray +n04479046 trench coat +n04482393 tricycle, trike, velocipede +n04483307 trimaran +n04485082 tripod +n04486054 triumphal arch +n04487081 trolleybus, trolley coach, trackless trolley +n04487394 trombone +n04493381 tub, vat +n04501370 turnstile +n04505470 typewriter keyboard +n04507155 umbrella +n04509417 unicycle, monocycle +n04515003 upright, upright piano +n04517823 vacuum, vacuum cleaner +n04522168 vase +n04523525 vault +n04525038 velvet +n04525305 vending machine +n04532106 vestment +n04532670 viaduct +n04536866 violin, fiddle +n04540053 volleyball +n04542943 waffle iron +n04548280 wall clock +n04548362 wallet, billfold, notecase, pocketbook +n04550184 wardrobe, closet, press +n04552348 warplane, military plane +n04553703 washbasin, handbasin, washbowl, lavabo, wash-hand basin +n04554684 washer, automatic washer, washing machine +n04557648 water bottle +n04560804 water jug +n04562935 water tower +n04579145 whiskey jug +n04579432 whistle +n04584207 wig +n04589890 window screen +n04590129 window shade +n04591157 Windsor tie +n04591713 wine bottle +n04592741 wing +n04596742 wok +n04597913 wooden spoon +n04599235 wool, woolen, woollen +n04604644 worm fence, snake fence, snake-rail fence, Virginia fence +n04606251 wreck +n04612504 yawl +n04613696 yurt +n06359193 web site, website, internet site, site +n06596364 comic book +n06785654 crossword puzzle, crossword +n06794110 street sign +n06874185 traffic light, traffic signal, stoplight +n07248320 book jacket, dust cover, dust jacket, dust wrapper +n07565083 menu +n07579787 plate +n07583066 guacamole +n07584110 consomme +n07590611 hot pot, hotpot +n07613480 trifle +n07614500 ice cream, icecream +n07615774 ice lolly, lolly, lollipop, popsicle +n07684084 French loaf +n07693725 bagel, beigel +n07695742 pretzel +n07697313 cheeseburger +n07697537 hotdog, hot dog, red hot +n07711569 mashed potato +n07714571 head cabbage +n07714990 broccoli +n07715103 cauliflower +n07716358 zucchini, courgette +n07716906 spaghetti squash +n07717410 acorn squash +n07717556 butternut squash +n07718472 cucumber, cuke +n07718747 artichoke, globe artichoke +n07720875 bell pepper +n07730033 cardoon +n07734744 mushroom +n07742313 Granny Smith +n07745940 strawberry +n07747607 orange +n07749582 lemon +n07753113 fig +n07753275 pineapple, ananas +n07753592 banana +n07754684 jackfruit, jak, jack +n07760859 custard apple +n07768694 pomegranate +n07802026 hay +n07831146 carbonara +n07836838 chocolate sauce, chocolate syrup +n07860988 dough +n07871810 meat loaf, meatloaf +n07873807 pizza, pizza pie +n07875152 potpie +n07880968 burrito +n07892512 red wine +n07920052 espresso +n07930864 cup +n07932039 eggnog +n09193705 alp +n09229709 bubble +n09246464 cliff, drop, drop-off +n09256479 coral reef +n09288635 geyser +n09332890 lakeside, lakeshore +n09399592 promontory, headland, head, foreland +n09421951 sandbar, sand bar +n09428293 seashore, coast, seacoast, sea-coast +n09468604 valley, vale +n09472597 volcano +n09835506 ballplayer, baseball player +n10148035 groom, bridegroom +n10565667 scuba diver +n11879895 rapeseed +n11939491 daisy +n12057211 yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum +n12144580 corn +n12267677 acorn +n12620546 hip, rose hip, rosehip +n12768682 buckeye, horse chestnut, conker +n12985857 coral fungus +n12998815 agaric +n13037406 gyromitra +n13040303 stinkhorn, carrion fungus +n13044778 earthstar +n13052670 hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa +n13054560 bolete +n13133613 ear, spike, capitulum +n15075141 toilet tissue, toilet paper, bathroom tissue diff --git a/display/glDisplay.cpp b/display/glDisplay.cpp new file mode 100644 index 000000000..b4de4f3f8 --- /dev/null +++ b/display/glDisplay.cpp @@ -0,0 +1,332 @@ +/* + * inference-101 + */ + +#include "glDisplay.h" + + + +// Constructor +glDisplay::glDisplay() +{ + mWindowX = 0; + mScreenX = NULL; + mVisualX = NULL; + mContextGL = NULL; + mDisplayX = NULL; + mWidth = 0; + mHeight = 0; + mAvgTime = 1.0f; + + clock_gettime(CLOCK_REALTIME, &mLastTime); +} + + +// Destructor +glDisplay::~glDisplay() +{ + glXDestroyContext(mDisplayX, mContextGL); +} + + +// Create +glDisplay* glDisplay::Create() +{ + glDisplay* vp = new glDisplay(); + + if( !vp ) + return NULL; + + if( !vp->initWindow() ) + { + printf("[OpenGL] failed to create X11 Window.\n"); + delete vp; + return NULL; + } + + if( !vp->initGL() ) + { + printf("[OpenGL] failed to initialize OpenGL.\n"); + delete vp; + return NULL; + } + + GLenum err = glewInit(); + + if (GLEW_OK != err) + { + printf("[OpenGL] GLEW Error: %s\n", glewGetErrorString(err)); + delete vp; + return NULL; + } + + printf("[OpenGL] glDisplay display window initialized\n"); + return vp; +} + + +// initWindow +bool glDisplay::initWindow() +{ + if( !mDisplayX ) + mDisplayX = XOpenDisplay(0); + + if( !mDisplayX ) + { + printf( "[OpenGL] failed to open X11 server connection." ); + return false; + } + + + if( !mDisplayX ) + { + printf( "InitWindow() - no X11 server connection." ); + return false; + } + + // retrieve screen info + const int screenIdx = DefaultScreen(mDisplayX); + const int screenWidth = DisplayWidth(mDisplayX, screenIdx); + const int screenHeight = DisplayHeight(mDisplayX, screenIdx); + + printf("default X screen %i: %i x %i\n", screenIdx, screenWidth, screenHeight); + + Screen* screen = XScreenOfDisplay(mDisplayX, screenIdx); + + if( !screen ) + { + printf("failed to retrieve default Screen instance\n"); + return false; + } + + Window winRoot = XRootWindowOfScreen(screen); + + // get framebuffer format + static int fbAttribs[] = + { + GLX_X_RENDERABLE, True, + GLX_DRAWABLE_TYPE, GLX_WINDOW_BIT, + GLX_RENDER_TYPE, GLX_RGBA_BIT, + GLX_X_VISUAL_TYPE, GLX_TRUE_COLOR, + GLX_RED_SIZE, 8, + GLX_GREEN_SIZE, 8, + GLX_BLUE_SIZE, 8, + GLX_ALPHA_SIZE, 8, + GLX_DEPTH_SIZE, 24, + GLX_STENCIL_SIZE, 8, + GLX_DOUBLEBUFFER, True, + GLX_SAMPLE_BUFFERS, 0, + GLX_SAMPLES, 0, + None + }; + + int fbCount = 0; + GLXFBConfig* fbConfig = glXChooseFBConfig(mDisplayX, screenIdx, fbAttribs, &fbCount); + + if( !fbConfig || fbCount == 0 ) + return false; + + // get a 'visual' + XVisualInfo* visual = glXGetVisualFromFBConfig(mDisplayX, fbConfig[0]); + + if( !visual ) + return false; + + // populate windows attributes + XSetWindowAttributes winAttr; + winAttr.colormap = XCreateColormap(mDisplayX, winRoot, visual->visual, AllocNone); + winAttr.background_pixmap = None; + winAttr.border_pixel = 0; + winAttr.event_mask = StructureNotifyMask|KeyPressMask|KeyReleaseMask|PointerMotionMask|ButtonPressMask|ButtonReleaseMask; + + + // create window + Window win = XCreateWindow(mDisplayX, winRoot, 0, 0, screenWidth, screenHeight, 0, + visual->depth, InputOutput, visual->visual, CWBorderPixel|CWColormap|CWEventMask, &winAttr); + + if( !win ) + return false; + + XStoreName(mDisplayX, win, "NVIDIA Jetson TX1 | L4T R24.1 aarch64 | Ubuntu 14.04 LTS"); + XMapWindow(mDisplayX, win); + + // cleanup + mWindowX = win; + mScreenX = screen; + mVisualX = visual; + mWidth = screenWidth; + mHeight = screenHeight; + + XFree(fbConfig); + return true; +} + + +void glDisplay::SetTitle( const char* str ) +{ + XStoreName(mDisplayX, mWindowX, str); +} + +// initGL +bool glDisplay::initGL() +{ + mContextGL = glXCreateContext(mDisplayX, mVisualX, 0, True); + + if( !mContextGL ) + return false; + + GL(glXMakeCurrent(mDisplayX, mWindowX, mContextGL)); + + return true; +} + + +// MakeCurrent +void glDisplay::BeginRender() +{ + GL(glXMakeCurrent(mDisplayX, mWindowX, mContextGL)); + + GL(glClearColor(0.05f, 0.05f, 0.05f, 1.0f)); + GL(glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT|GL_STENCIL_BUFFER_BIT)); + + GL(glViewport(0, 0, mWidth, mHeight)); + GL(glMatrixMode(GL_PROJECTION)); + GL(glLoadIdentity()); + GL(glOrtho(0.0f, mWidth, mHeight, 0.0f, 0.0f, 1.0f)); +} + + +// timeDiff +static timespec timeDiff( const timespec& start, const timespec& end) +{ + timespec temp; + if ((end.tv_nsec-start.tv_nsec)<0) { + temp.tv_sec = end.tv_sec-start.tv_sec-1; + temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec; + } else { + temp.tv_sec = end.tv_sec-start.tv_sec; + temp.tv_nsec = end.tv_nsec-start.tv_nsec; + } + return temp; +} + + +// Refresh +void glDisplay::EndRender() +{ + glXSwapBuffers(mDisplayX, mWindowX); + + // measure framerate + timespec currTime; + clock_gettime(CLOCK_REALTIME, &currTime); + + const timespec diffTime = timeDiff(mLastTime, currTime); + const float ns = 1000000000 * diffTime.tv_sec + diffTime.tv_nsec; + + mAvgTime = mAvgTime * 0.8f + ns * 0.2f; + mLastTime = currTime; +} + + +#define MOUSE_MOVE 0 +#define MOUSE_BUTTON 1 +#define MOUSE_WHEEL 2 +#define MOUSE_DOUBLE 3 +#define KEY_STATE 4 +#define KEY_CHAR 5 + + +// OnEvent +void glDisplay::onEvent( uint msg, int a, int b ) +{ + switch(msg) + { + case MOUSE_MOVE: + { + //mMousePos.Set(a,b); + break; + } + case MOUSE_BUTTON: + { + /*if( mMouseButton[a] != (bool)b ) + { + mMouseButton[a] = b; + + if( b ) + mMouseDownEvent = true; + + // ignore right-mouse up events + if( !(a == 1 && !b) ) + mMouseEvent = true; + }*/ + + break; + } + case MOUSE_DOUBLE: + { + /*mMouseDblClick = b; + + if( b ) + { + mMouseEvent = true; + mMouseDownEvent = true; + }*/ + + break; + } + case MOUSE_WHEEL: + { + //mMouseWheel = a; + break; + } + case KEY_STATE: + { + //mKeys[a] = b; + break; + } + case KEY_CHAR: + { + //mKeyText = a; + break; + } + } + + //if( msg == MOUSE_MOVE || msg == MOUSE_BUTTON || msg == MOUSE_DOUBLE || msg == MOUSE_WHEEL ) + // mMouseEventLast = time(); +} + + +// UserEvents() +void glDisplay::UserEvents() +{ + // reset input states + /*mMouseEvent = false; + mMouseDownEvent = false; + mMouseDblClick = false; + mMouseWheel = 0; + mKeyText = 0;*/ + + + XEvent evt; + + while( XEventsQueued(mDisplayX, QueuedAlready) > 0 ) + { + XNextEvent(mDisplayX, &evt); + + switch( evt.type ) + { + case KeyPress: onEvent(KEY_STATE, evt.xkey.keycode, 1); break; + case KeyRelease: onEvent(KEY_STATE, evt.xkey.keycode, 0); break; + case ButtonPress: onEvent(MOUSE_BUTTON, evt.xbutton.button, 1); break; + case ButtonRelease: onEvent(MOUSE_BUTTON, evt.xbutton.button, 0); break; + case MotionNotify: + { + XWindowAttributes attr; + XGetWindowAttributes(mDisplayX, evt.xmotion.root, &attr); + onEvent(MOUSE_MOVE, evt.xmotion.x_root + attr.x, evt.xmotion.y_root + attr.y); + break; + } + } + } +} + diff --git a/display/glDisplay.h b/display/glDisplay.h new file mode 100644 index 000000000..53f78d63c --- /dev/null +++ b/display/glDisplay.h @@ -0,0 +1,83 @@ +/* + * inference-101 + */ + +#ifndef __GL_VIEWPORT_H__ +#define __GL_VIEWPORT_H__ + + +#include "glUtility.h" +#include "glTexture.h" + +#include + + +/** + * OpenGL display window / video viewer + */ +class glDisplay +{ +public: + /** + * Create a new maximized openGL display window. + */ + static glDisplay* Create(); + + /** + * Destroy window + */ + ~glDisplay(); + + /** + * Clear window and begin rendering a frame. + */ + void BeginRender(); + + /** + * Finish rendering and refresh / flip the backbuffer. + */ + void EndRender(); + + /** + * Process UI events. + */ + void UserEvents(); + + /** + * UI event handler. + */ + void onEvent( uint msg, int a, int b ); + + /** + * Set the window title string. + */ + void SetTitle( const char* str ); + + /** + * Get the average frame time (in milliseconds). + */ + inline float GetFPS() { return 1000000000.0f / mAvgTime; } + +protected: + glDisplay(); + + bool initWindow(); + bool initGL(); + + static const int screenIdx = 0; + + Display* mDisplayX; + Screen* mScreenX; + XVisualInfo* mVisualX; + Window mWindowX; + GLXContext mContextGL; + + uint32_t mWidth; + uint32_t mHeight; + + timespec mLastTime; + float mAvgTime; +}; + +#endif + diff --git a/display/glTexture.cpp b/display/glTexture.cpp new file mode 100644 index 000000000..5ee373d4c --- /dev/null +++ b/display/glTexture.cpp @@ -0,0 +1,377 @@ +/* + * inference-101 + */ + +#include "glUtility.h" +#include "glTexture.h" + +#include "cudaMappedMemory.h" + + +//----------------------------------------------------------------------------------- +inline uint32_t glTextureLayout( uint32_t format ) +{ + switch(format) + { + case GL_LUMINANCE8: + case GL_LUMINANCE16: + case GL_LUMINANCE32UI_EXT: + case GL_LUMINANCE8I_EXT: + case GL_LUMINANCE16I_EXT: + case GL_LUMINANCE32I_EXT: + case GL_LUMINANCE16F_ARB: + case GL_LUMINANCE32F_ARB: return GL_LUMINANCE; + + case GL_LUMINANCE8_ALPHA8: + case GL_LUMINANCE16_ALPHA16: + case GL_LUMINANCE_ALPHA32UI_EXT: + case GL_LUMINANCE_ALPHA8I_EXT: + case GL_LUMINANCE_ALPHA16I_EXT: + case GL_LUMINANCE_ALPHA32I_EXT: + case GL_LUMINANCE_ALPHA16F_ARB: + case GL_LUMINANCE_ALPHA32F_ARB: return GL_LUMINANCE_ALPHA; + + case GL_RGB8: + case GL_RGB16: + case GL_RGB32UI: + case GL_RGB8I: + case GL_RGB16I: + case GL_RGB32I: + case GL_RGB16F_ARB: + case GL_RGB32F_ARB: return GL_RGB; + + case GL_RGBA8: + case GL_RGBA16: + case GL_RGBA32UI: + case GL_RGBA8I: + case GL_RGBA16I: + case GL_RGBA32I: + //case GL_RGBA_FLOAT32: + case GL_RGBA16F_ARB: + case GL_RGBA32F_ARB: return GL_RGBA; + } + + return 0; +} + + +inline uint32_t glTextureLayoutChannels( uint32_t format ) +{ + const uint layout = glTextureLayout(format); + + switch(layout) + { + case GL_LUMINANCE: return 1; + case GL_LUMINANCE_ALPHA: return 2; + case GL_RGB: return 3; + case GL_RGBA: return 4; + } + + return 0; +} + + +inline uint32_t glTextureType( uint32_t format ) +{ + switch(format) + { + case GL_LUMINANCE8: + case GL_LUMINANCE8_ALPHA8: + case GL_RGB8: + case GL_RGBA8: return GL_UNSIGNED_BYTE; + + case GL_LUMINANCE16: + case GL_LUMINANCE16_ALPHA16: + case GL_RGB16: + case GL_RGBA16: return GL_UNSIGNED_SHORT; + + case GL_LUMINANCE32UI_EXT: + case GL_LUMINANCE_ALPHA32UI_EXT: + case GL_RGB32UI: + case GL_RGBA32UI: return GL_UNSIGNED_INT; + + case GL_LUMINANCE8I_EXT: + case GL_LUMINANCE_ALPHA8I_EXT: + case GL_RGB8I: + case GL_RGBA8I: return GL_BYTE; + + case GL_LUMINANCE16I_EXT: + case GL_LUMINANCE_ALPHA16I_EXT: + case GL_RGB16I: + case GL_RGBA16I: return GL_SHORT; + + case GL_LUMINANCE32I_EXT: + case GL_LUMINANCE_ALPHA32I_EXT: + case GL_RGB32I: + case GL_RGBA32I: return GL_INT; + + + case GL_LUMINANCE16F_ARB: + case GL_LUMINANCE_ALPHA16F_ARB: + case GL_RGB16F_ARB: + case GL_RGBA16F_ARB: return GL_FLOAT; + + case GL_LUMINANCE32F_ARB: + case GL_LUMINANCE_ALPHA32F_ARB: + //case GL_RGBA_FLOAT32: + case GL_RGB32F_ARB: + case GL_RGBA32F_ARB: return GL_FLOAT; + } + + return 0; +} + + +inline uint glTextureTypeSize( uint32_t format ) +{ + const uint type = glTextureType(format); + + switch(type) + { + case GL_UNSIGNED_BYTE: + case GL_BYTE: return 1; + + case GL_UNSIGNED_SHORT: + case GL_SHORT: return 2; + + case GL_UNSIGNED_INT: + case GL_INT: + case GL_FLOAT: return 4; + } + + return 0; +} +//----------------------------------------------------------------------------------- + +// constructor +glTexture::glTexture() +{ + mID = 0; + mDMA = 0; + mWidth = 0; + mHeight = 0; + mFormat = 0; + mSize = 0; + + mInteropCUDA = NULL; + mInteropHost = NULL; + mInteropDevice = NULL; +} + + +// destructor +glTexture::~glTexture() +{ + GL(glDeleteTextures(1, &mID)); +} + + +// Create +glTexture* glTexture::Create( uint32_t width, uint32_t height, uint32_t format, void* data ) +{ + glTexture* tex = new glTexture(); + + if( !tex->init(width, height, format, data) ) + { + printf("[OpenGL] failed to create %ux%u texture\n", width, height); + return NULL; + } + + return tex; +} + + +// Alloc +bool glTexture::init( uint32_t width, uint32_t height, uint32_t format, void* data ) +{ + const uint32_t size = width * height * glTextureLayoutChannels(format) * glTextureTypeSize(format); + + if( size == 0 ) + return NULL; + + // generate texture objects + uint32_t id = 0; + + GL(glEnable(GL_TEXTURE_2D)); + GL(glGenTextures(1, &id)); + GL(glBindTexture(GL_TEXTURE_2D, id)); + + // set default texture parameters + GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE)); + GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE)); + GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR)); + GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR)); + + printf("[OpenGL] creating %ux%u texture\n", width, height); + + // allocate texture + GL_VERIFYN(glTexImage2D(GL_TEXTURE_2D, 0, format, width, height, 0, glTextureLayout(format), glTextureType(format), data)); + GL(glBindTexture(GL_TEXTURE_2D, 0)); + + // allocate DMA PBO + uint32_t dma = 0; + + GL(glGenBuffers(1, &dma)); + GL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, dma)); + GL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, size, NULL, GL_DYNAMIC_DRAW_ARB)); + GL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0)); + + + mID = id; + mDMA = dma; + mWidth = width; + mHeight = height; + mFormat = format; + mSize = size; + return true; +} + + +// MapCUDA +void* glTexture::MapCUDA() +{ + if( !mInteropCUDA ) + { + if( CUDA_FAILED(cudaGraphicsGLRegisterBuffer(&mInteropCUDA, mDMA, cudaGraphicsRegisterFlagsWriteDiscard)) ) + return NULL; + + printf( "[cuda] registered %u byte openGL texture for interop access (%ux%u)\n", mSize, mWidth, mHeight); + } + + if( CUDA_FAILED(cudaGraphicsMapResources(1, &mInteropCUDA)) ) + return NULL; + + void* devPtr = NULL; + size_t mappedSize = 0; + + if( CUDA_FAILED(cudaGraphicsResourceGetMappedPointer(&devPtr, &mappedSize, mInteropCUDA)) ) + { + CUDA(cudaGraphicsUnmapResources(1, &mInteropCUDA)); + return NULL; + } + + if( mSize != mappedSize ) + printf("[OpenGL] glTexture::MapCUDA() -- size mismatch %zu bytes (expected=%u)\n", mappedSize, mSize); + + return devPtr; +} + + +// Unmap +void glTexture::Unmap() +{ + if( !mInteropCUDA ) + return; + + CUDA(cudaGraphicsUnmapResources(1, &mInteropCUDA)); + + GL(glEnable(GL_TEXTURE_2D)); + GL(glBindTexture(GL_TEXTURE_2D, mID)); + GL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, mDMA)); + GL(glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, mWidth, mHeight, glTextureLayout(mFormat), glTextureType(mFormat), NULL)); + + GL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0)); + GL(glBindTexture(GL_TEXTURE_2D, 0)); + GL(glDisable(GL_TEXTURE_2D)); +} + + +// Upload +bool glTexture::UploadCPU( void* data ) +{ + // activate texture & pbo + GL(glEnable(GL_TEXTURE_2D)); + GL(glActiveTextureARB(GL_TEXTURE0_ARB)); + GL(glBindTexture(GL_TEXTURE_2D, mID)); + GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0)); + GL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, mDMA)); + + //GL(glPixelStorei(GL_UNPACK_ALIGNMENT, 1)); + //GL(glPixelStorei(GL_UNPACK_ROW_LENGTH, img->GetWidth())); + //GL(glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, img->GetHeight())); + + // hint to driver to double-buffer + // glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, mImage->GetSize(), NULL, GL_STREAM_DRAW_ARB); + + // map PBO + GLubyte* ptr = (GLubyte*)glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY_ARB); + + if( !ptr ) + { + GL_CHECK("glMapBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY_ARB)"); + return NULL; + } + + memcpy(ptr, data, mSize); + + GL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB)); + + //GL(glEnable(GL_TEXTURE_2D)); + //GL(glBindTexture(GL_TEXTURE_2D, mID)); + //GL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, mDMA)); + GL(glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, mWidth, mHeight, glTextureLayout(mFormat), glTextureType(mFormat), NULL)); + + GL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0)); + GL(glBindTexture(GL_TEXTURE_2D, 0)); + GL(glDisable(GL_TEXTURE_2D)); + + /*if( !mInteropHost || !mInteropDevice ) + { + if( !cudaAllocMapped(&mInteropHost, &mInteropDevice, mSize) ) + return false; + } + + memcpy(mInteropHost, data, mSize); + + void* devGL = MapCUDA(); + + if( !devGL ) + return false; + + CUDA(cudaMemcpy(devGL, mInteropDevice, mSize, cudaMemcpyDeviceToDevice)); + Unmap();*/ + + return true; +} + + +// Render +void glTexture::Render( const float4& rect ) +{ + GL(glEnable(GL_TEXTURE_2D)); + GL(glBindTexture(GL_TEXTURE_2D, mID)); + + glBegin(GL_QUADS); + + glColor4f(1.0f,1.0f,1.0f,1.0f); + + glTexCoord2f(0.0f, 0.0f); + glVertex2d(rect.x, rect.y); + + glTexCoord2f(1.0f, 0.0f); + glVertex2d(rect.z, rect.y); + + glTexCoord2f(1.0f, 1.0f); + glVertex2d(rect.z, rect.w); + + glTexCoord2f(0.0f, 1.0f); + glVertex2d(rect.x, rect.w); + + glEnd(); + + GL(glBindTexture(GL_TEXTURE_2D, 0)); +} + + + +void glTexture::Render( float x, float y ) +{ + Render(x, y, mWidth, mHeight); +} + +void glTexture::Render( float x, float y, float width, float height ) +{ + Render(make_float4(x, y, x + width, y + height)); +} + + diff --git a/display/glTexture.h b/display/glTexture.h new file mode 100644 index 000000000..db7c43269 --- /dev/null +++ b/display/glTexture.h @@ -0,0 +1,54 @@ +/* + * inference-101 + */ + +#ifndef __GL_TEXTURE_H__ +#define __GL_TEXTURE_H__ + + +#include "cudaUtility.h" +#include "cuda_gl_interop.h" + + +/** + * OpenGL texture + */ +class glTexture +{ +public: + static glTexture* Create( uint32_t width, uint32_t height, uint32_t format, void* data=NULL ); + ~glTexture(); + + void Render( float x, float y ); + void Render( float x, float y, float width, float height ); + void Render( const float4& rect ); + + inline uint32_t GetID() const { return mID; } + inline uint32_t GetWidth() const { return mWidth; } + inline uint32_t GetHeight() const { return mHeight; } + inline uint32_t GetFormat() const { return mFormat; } + inline uint32_t GetSize() const { return mSize; } + + void* MapCUDA(); + void Unmap(); + + bool UploadCPU( void* data ); + +private: + glTexture(); + bool init(uint32_t width, uint32_t height, uint32_t format, void* data); + + uint32_t mID; + uint32_t mDMA; + uint32_t mWidth; + uint32_t mHeight; + uint32_t mFormat; + uint32_t mSize; + + cudaGraphicsResource* mInteropCUDA; + void* mInteropHost; + void* mInteropDevice; +}; + + +#endif diff --git a/display/glUtility.h b/display/glUtility.h new file mode 100644 index 000000000..9dc0d336b --- /dev/null +++ b/display/glUtility.h @@ -0,0 +1,125 @@ +/* + * inference-101 + */ + +#ifndef __OPENGL_UTILITY_H +#define __OPENGL_UTILITY_H + + +#include +#include + +#include + + +/** + * LOG_GL printf prefix. + * @ingroup renderGL + */ +#define LOG_GL "[openGL] " + + + +#define GL(x) { x; glCheckError( #x, __FILE__, __LINE__ ); } +#define GL_VERIFY(x) { x; if(glCheckError( #x, __FILE__, __LINE__ )) return false; } +#define GL_VERIFYN(x) { x; if(glCheckError( #x, __FILE__, __LINE__ )) return NULL; } +#define GL_CHECK(msg) { glCheckError(msg, __FILE__, __LINE__); } + + +/** + * openGL error logging macros + * @ingroup renderGL + */ +inline bool glCheckError(const char* msg, const char* file, int line) +{ + GLenum err = glGetError(); + + if( err == GL_NO_ERROR ) + return false; + + const char* e = NULL; + + switch(err) + { + case GL_INVALID_ENUM: e = "invalid enum"; break; + case GL_INVALID_VALUE: e = "invalid value"; break; + case GL_INVALID_OPERATION: e = "invalid operation"; break; + case GL_STACK_OVERFLOW: e = "stack overflow"; break; + case GL_STACK_UNDERFLOW: e = "stack underflow"; break; + case GL_OUT_OF_MEMORY: e = "out of memory"; break; + #ifdef GL_TABLE_TOO_LARGE_EXT + case GL_TABLE_TOO_LARGE_EXT: e = "table too large"; break; + #endif + #ifdef GL_TEXTURE_TOO_LARGE_EXT + case GL_TEXTURE_TOO_LARGE_EXT: e = "texture too large"; break; + #endif + default: e = "unknown error"; + } + + printf(LOG_GL "Error %i - '%s'\n", (uint)err, e); + printf(LOG_GL " %s::%i\n", file, line ); + printf(LOG_GL " %s\n", msg ); + + return true; +} + + +/** + * openGL error check + logging + * @ingroup renderGL + */ +inline bool glCheckError(const char* msg) +{ + GLenum err = glGetError(); + + if( err == GL_NO_ERROR ) + return false; + + const char* e = NULL; + + switch(err) + { + case GL_INVALID_ENUM: e = "invalid enum"; break; + case GL_INVALID_VALUE: e = "invalid value"; break; + case GL_INVALID_OPERATION: e = "invalid operation"; break; + case GL_STACK_OVERFLOW: e = "stack overflow"; break; + case GL_STACK_UNDERFLOW: e = "stack underflow"; break; + case GL_OUT_OF_MEMORY: e = "out of memory"; break; + #ifdef GL_TABLE_TOO_LARGE_EXT + case GL_TABLE_TOO_LARGE_EXT: e = "table too large"; break; + #endif + #ifdef GL_TEXTURE_TOO_LARGE_EXT + case GL_TEXTURE_TOO_LARGE_EXT: e = "texture too large"; break; + #endif + default: e = "unknown error"; + } + + printf(LOG_GL "%s (error %i - %s)\n", msg, (uint)err, e); + return true; +} + + + +#define GL_GPU_MEM_INFO_TOTAL_AVAILABLE_MEM_NVX 0x9048 +#define GL_GPU_MEM_INFO_CURRENT_AVAILABLE_MEM_NVX 0x9049 + + +/** + * glPrintFreeMem + * @ingroup renderGL + */ +inline void glPrintFreeMem() +{ + GLint total_mem_kb = 0; + GLint cur_avail_mem_kb = 0; + + glGetIntegerv(GL_GPU_MEM_INFO_TOTAL_AVAILABLE_MEM_NVX, &total_mem_kb); + glGetIntegerv(GL_GPU_MEM_INFO_CURRENT_AVAILABLE_MEM_NVX,&cur_avail_mem_kb); + + printf("[openGL] GPU memory free %i / %i kb\n", cur_avail_mem_kb, total_mem_kb); +} + + + +#endif + diff --git a/imageNet.cpp b/imageNet.cpp new file mode 100644 index 000000000..588fc8ca3 --- /dev/null +++ b/imageNet.cpp @@ -0,0 +1,284 @@ +/* + * inference-101 + */ + +#include "imageNet.h" +#include "cudaMappedMemory.h" +#include "cudaResize.h" + +#include + + +// stuff we know about the network and the caffe input/output blobs +static const int MAX_BATCH_SIZE = 1; + +const char* INPUT_BLOB_NAME = "data"; +const char* OUTPUT_BLOB_NAME = "prob"; + + + +imageNet::imageNet() +{ + mEngine = NULL; + mInfer = NULL; + mContext = NULL; + + mWidth = 0; + mHeight = 0; + mInputSize = 0; + mInputCPU = NULL; + mInputCUDA = NULL; + + mOutputSize = 0; + mOutputClasses = 0; + mOutputCPU = NULL; + mOutputCUDA = NULL; +} + + +imageNet::~imageNet() +{ + if( mEngine != NULL ) + { + mEngine->destroy(); + mEngine = NULL; + } + + if( mInfer != NULL ) + { + mInfer->destroy(); + mInfer = NULL; + } +} + + +imageNet* imageNet::Create( imageNet::NetworkType networkType ) +{ + imageNet* net = new imageNet(); + + if( !net ) + return NULL; + + if( !net->init(networkType) ) + { + printf("imageNet -- failed to initialize.\n"); + return NULL; + } + + return net; +} + + +// loadClassInfo +bool imageNet::loadClassInfo( const char* filename ) +{ + if( !filename ) + return false; + + FILE* f = fopen(filename, "r"); + + if( !f ) + { + printf("imageNet -- failed to open %s\n", filename); + return false; + } + + char str[512]; + + while( fgets(str, 512, f) != NULL ) + { + const int syn = 9; // length of synset prefix (in characters) + const int len = strlen(str); + + if( len < syn + 1 ) + continue; + + str[syn] = 0; + str[len-1] = 0; + + const std::string a = str; + const std::string b = (str + syn + 1); + + //printf("a=%s b=%s\n", a.c_str(), b.c_str()); + mClassSynset.push_back(a); + mClassDesc.push_back(b); + } + + fclose(f); + + printf("imageNet -- loaded %zu class info entries\n", mClassSynset.size()); + + if( mClassSynset.size() == 0 ) + return false; + + return true; +} + + +// init +bool imageNet::init( imageNet::NetworkType networkType ) +{ + const char* proto_file[] = { "alexnet.prototxt", "googlenet.prototxt" }; + const char* model_file[] = { "bvlc_alexnet.caffemodel", "bvlc_googlenet.caffemodel" }; + + /* + * load and parse googlenet network definition and model file + */ + std::stringstream gieModelStream; + gieModelStream.seekg(0, gieModelStream.beg); + mNetworkType = networkType; + + if( !caffeToGIEModel(proto_file[networkType], model_file[networkType], std::vector< std::string > { OUTPUT_BLOB_NAME }, MAX_BATCH_SIZE, gieModelStream) ) + { + printf("failed to load %s\n", GetNetworkName()); + return 0; + } + + printf(LOG_GIE "%s loaded\n", GetNetworkName()); + + + + /* + * create runtime inference engine execution context + */ + nvinfer1::IRuntime* infer = createInferRuntime(gLogger); + + if( !infer ) + { + printf(LOG_GIE "failed to create InferRuntime\n"); + return 0; + } + + nvinfer1::ICudaEngine* engine = infer->deserializeCudaEngine(gieModelStream); + + if( !engine ) + { + printf(LOG_GIE "failed to create CUDA engine\n"); + return 0; + } + + nvinfer1::IExecutionContext* context = engine->createExecutionContext(); + + if( !context ) + { + printf(LOG_GIE "failed to create execution context\n"); + return 0; + } + + printf(LOG_GIE "CUDA engine context initialized with %u bindings\n", engine->getNbBindings()); + + mInfer = infer; + mEngine = engine; + mContext = context; + + + /* + * determine dimensions of network bindings + */ + const int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME); + const int outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME); + + printf(LOG_GIE "%s input binding index: %i\n", GetNetworkName(), inputIndex); + printf(LOG_GIE "%s output binding index: %i\n", GetNetworkName(), outputIndex); + + nvinfer1::Dims3 inputDims = engine->getBindingDimensions(inputIndex); + nvinfer1::Dims3 outputDims = engine->getBindingDimensions(outputIndex); + + size_t inputSize = inputDims.c * inputDims.h * inputDims.w * sizeof(float); + size_t outputSize = outputDims.c * outputDims.h * outputDims.w * sizeof(float); + + printf(LOG_GIE "%s input dims (c=%u h=%u w=%u) size=%zu\n", GetNetworkName(), inputDims.c, inputDims.h, inputDims.w, inputSize); + printf(LOG_GIE "%s output dims (c=%u h=%u w=%u) size=%zu\n", GetNetworkName(), outputDims.c, outputDims.h, outputDims.w, outputSize); + + + /* + * allocate memory to hold the input image + */ + if( !cudaAllocMapped((void**)&mInputCPU, (void**)&mInputCUDA, inputSize) ) + { + printf("failed to alloc CUDA mapped memory for imageNet input, %zu bytes\n", inputSize); + return false; + } + + mInputSize = inputSize; + mWidth = inputDims.w; + mHeight = inputDims.h; + + + /* + * allocate output memory to hold the image classes + */ + if( !cudaAllocMapped((void**)&mOutputCPU, (void**)&mOutputCUDA, outputSize) ) + { + printf("failed to alloc CUDA mapped memory for %u output classes\n", outputDims.c); + return false; + } + + mOutputSize = outputSize; + mOutputClasses = outputDims.c; + + if( !loadClassInfo("ilsvrc12_synset_words.txt") || mClassSynset.size() != mOutputClasses || mClassDesc.size() != mOutputClasses ) + { + printf("imageNet -- failed to load synset class descriptions (%zu / %zu of %u)\n", mClassSynset.size(), mClassDesc.size(), mOutputClasses); + return false; + } + + printf("%s initialized.\n", GetNetworkName()); + return true; +} + + +// from imageNet.cu +cudaError_t cudaPreImageNet( float4* input, size_t inputWidth, size_t inputHeight, float* output, size_t outputWidth, size_t outputHeight, const float3& mean_value ); + + +// Classify +int imageNet::Classify( float* rgba, uint32_t width, uint32_t height, float* confidence ) +{ + if( !rgba || width == 0 || height == 0 ) + { + printf("imageNet::Classify( 0x%p, %u, %u ) -> invalid parameters\n", rgba, width, height); + return -1; + } + + + // downsample and convert to band-sequential BGR + if( CUDA_FAILED(cudaPreImageNet((float4*)rgba, width, height, mInputCUDA, mWidth, mHeight, + make_float3(104.0069879317889f, 116.66876761696767f, 122.6789143406786f))) ) + { + printf("imageNet::Classify() -- cudaPreImageNet failed\n"); + return -1; + } + + // process with GIE + void* inferenceBuffers[] = { mInputCUDA, mOutputCUDA }; + + mContext->execute(1, inferenceBuffers); + + //CUDA(cudaDeviceSynchronize()); + + // determine the maximum class + int classIndex = -1; + float classMax = -1.0f; + + for( size_t n=0; n < mOutputClasses; n++ ) + { + const float value = mOutputCPU[n]; + + if( value >= 0.01f ) + printf("class %04zu - %f (%s)\n", n, value, mClassDesc[n].c_str()); + + if( value > classMax ) + { + classIndex = n; + classMax = value; + } + } + + if( confidence != NULL ) + *confidence = classMax; + + //printf("\nmaximum class: #%i (%f) (%s)\n", classIndex, classMax, mClassDesc[classIndex].c_str()); + return classIndex; +} + diff --git a/imageNet.cu b/imageNet.cu new file mode 100644 index 000000000..c63b7ad5f --- /dev/null +++ b/imageNet.cu @@ -0,0 +1,50 @@ + +#include "cudaUtility.h" + + + +// gpuPreImageNet +__global__ void gpuPreImageNet( float2 scale, float4* input, int iWidth, float* output, int oWidth, int oHeight, float3 mean_value ) +{ + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + const int n = oWidth * oHeight; + + if( x >= oWidth || y >= oHeight ) + return; + + const int dx = ((float)x * scale.x); + const int dy = ((float)y * scale.y); + + const float4 px = input[ dy * iWidth + dx ]; + const float3 bgr = make_float3(px.z - mean_value.x, px.y - mean_value.y, px.x - mean_value.z); + + output[n * 0 + y * oWidth + x] = bgr.x; + output[n * 1 + y * oWidth + x] = bgr.y; + output[n * 2 + y * oWidth + x] = bgr.z; +} + + +// cudaPreImageNet +cudaError_t cudaPreImageNet( float4* input, size_t inputWidth, size_t inputHeight, + float* output, size_t outputWidth, size_t outputHeight, const float3& mean_value ) +{ + if( !input || !output ) + return cudaErrorInvalidDevicePointer; + + if( inputWidth == 0 || outputWidth == 0 || inputHeight == 0 || outputHeight == 0 ) + return cudaErrorInvalidValue; + + const float2 scale = make_float2( float(inputWidth) / float(outputWidth), + float(inputHeight) / float(outputHeight) ); + + // launch kernel + const dim3 blockDim(8, 8); + const dim3 gridDim(iDivUp(outputWidth,blockDim.x), iDivUp(outputHeight,blockDim.y)); + + gpuPreImageNet<<>>(scale, input, inputWidth, output, outputWidth, outputHeight, mean_value); + + return CUDA(cudaGetLastError()); +} + + diff --git a/imageNet.h b/imageNet.h new file mode 100644 index 000000000..debb18499 --- /dev/null +++ b/imageNet.h @@ -0,0 +1,100 @@ +/* + * inference-101 + */ + +#ifndef __IMAGE_NET_H__ +#define __IMAGE_NET_H__ + + +#include "caffeToGIE.h" + + +/** + * Image recognition with GoogLeNet/Alexnet, using GIE. + */ +class imageNet +{ +public: + /** + * Network choice enumeration. + */ + enum NetworkType + { + ALEXNET, + GOOGLENET + }; + + /** + * Load a new network instance + */ + static imageNet* Create( NetworkType networkType=GOOGLENET ); + + /** + * Destory + */ + ~imageNet(); + + /** + * Determine the maximum likelihood image class. + * @param rgba float4 input image in CUDA device memory. + * @param width width of the input image in pixels. + * @param height height of the input image in pixels. + * @param confidence optional pointer to float filled with confidence value. + * @returns Index of the maximum class, or -1 on error. + */ + int Classify( float* rgba, uint32_t width, uint32_t height, float* confidence=NULL ); + + /** + * Retrieve the number of image recognition classes (typically 1000) + */ + inline uint32_t GetNumClasses() const { return mOutputClasses; } + + /** + * Retrieve the description of a particular class. + */ + inline const char* GetClassDesc( uint32_t index ) const { return mClassDesc[index].c_str(); } + + /** + * Retrieve the class synset category of a particular class. + */ + inline const char* GetClassSynset( uint32_t index ) const { return mClassSynset[index].c_str(); } + + /** + * Retrieve the network type (alexnet or googlenet) + */ + inline NetworkType GetNetworkType() const { return mNetworkType; } + + /** + * Retrieve a string describing the network name. + */ + inline const char* GetNetworkName() const { return (mNetworkType == GOOGLENET ? "googlenet" : "alexnet"); } + +protected: + imageNet(); + + bool init( NetworkType networkType ); + bool loadClassInfo( const char* filename ); + + nvinfer1::IRuntime* mInfer; + nvinfer1::ICudaEngine* mEngine; + nvinfer1::IExecutionContext* mContext; + + uint32_t mWidth; + uint32_t mHeight; + uint32_t mInputSize; + float* mInputCPU; + float* mInputCUDA; + + uint32_t mOutputSize; + uint32_t mOutputClasses; + float* mOutputCPU; + float* mOutputCUDA; + + std::vector mClassSynset; // 1000 class ID's (ie n01580077, n04325704) + std::vector mClassDesc; + + NetworkType mNetworkType; +}; + + +#endif diff --git a/imagenet-camera/CMakeLists.txt b/imagenet-camera/CMakeLists.txt new file mode 100644 index 000000000..bd0f0954f --- /dev/null +++ b/imagenet-camera/CMakeLists.txt @@ -0,0 +1,6 @@ + +file(GLOB imagenetCameraSources *.cpp) +file(GLOB imagenetCameraIncludes *.h ) + +cuda_add_executable(imagenet-camera ${imagenetCameraSources}) +target_link_libraries(imagenet-camera nvcaffe_parser nvinfer jetson-inference) diff --git a/imagenet-camera/imagenet-camera.cpp b/imagenet-camera/imagenet-camera.cpp new file mode 100644 index 000000000..8c6d4c2eb --- /dev/null +++ b/imagenet-camera/imagenet-camera.cpp @@ -0,0 +1,198 @@ +/* + * inference-101 + */ + +#include "gstCamera.h" + +#include "glDisplay.h" +#include "glTexture.h" + +#include +#include +#include + +#include "cudaNormalize.h" +#include "imageNet.h" + + +bool signal_recieved = false; + +void sig_handler(int signo) +{ + if( signo == SIGINT ) + { + printf("received SIGINT\n"); + signal_recieved = true; + } +} + + +int main( int argc, char** argv ) +{ + printf("imagenet-camera\n args (%i): ", argc); + + for( int i=0; i < argc; i++ ) + printf("%i [%s] ", i, argv[i]); + + printf("\n"); + + + /* + * parse network type from CLI arguments + */ + imageNet::NetworkType networkType = imageNet::GOOGLENET; + + if( argc > 1 && strcmp(argv[1], "alexnet") == 0 ) + networkType = imageNet::ALEXNET; + + if( signal(SIGINT, sig_handler) == SIG_ERR ) + printf("\ncan't catch SIGINT\n"); + + + /* + * create the camera device + */ + gstCamera* camera = gstCamera::Create(); + + if( !camera ) + { + printf("\nimagenet-camera: failed to initialize video device\n"); + return 0; + } + + printf("\nimagenet-camera: successfully initialized video device\n"); + printf(" width: %u\n", camera->GetWidth()); + printf(" height: %u\n", camera->GetHeight()); + printf(" depth: %u (bpp)\n\n", camera->GetPixelDepth()); + + + /* + * create imageNet + */ + imageNet* net = imageNet::Create(networkType); + + if( !net ) + { + printf("imagenet-console: failed to initialize imageNet\n"); + return 0; + } + + + /* + * create openGL window + */ + glDisplay* display = glDisplay::Create(); + + if( !display ) + printf("\nimagenet-camera: failed to create openGL display\n"); + + glTexture* texture = glTexture::Create(camera->GetWidth(), camera->GetHeight(), GL_RGBA32F_ARB/*GL_RGBA8*/); + + if( !texture ) + printf("imagenet-camera: failed to create openGL texture\n"); + + + + /* + * start streaming + */ + if( !camera->Open() ) + { + printf("\nimagenet-camera: failed to open camera for streaming\n"); + return 0; + } + + printf("\nimagenet-camera: camera open for streaming\n"); + + + /* + * processing loop + */ + float confidence = 0.0f; + + while( !signal_recieved ) + { + void* imgCPU = NULL; + void* imgCUDA = NULL; + + // get the latest frame + if( !camera->Capture(&imgCPU, &imgCUDA, 1000) ) + printf("\nimagenet-camera: failed to capture frame\n"); + //else + // printf("imagenet-camera: recieved new frame CPU=0x%p GPU=0x%p\n", imgCPU, imgCUDA); + + // convert from YUV to RGBA + void* imgRGBA = NULL; + + if( !camera->ConvertRGBA(imgCUDA, &imgRGBA) ) + printf("imagenet-camera: failed to convert from NV12 to RGBA\n"); + + // classify image + const int img_class = net->Classify((float*)imgRGBA, camera->GetWidth(), camera->GetHeight(), &confidence); + + if( img_class >= 0 ) + { + printf("imagenet-camera: %2.5f%% class #%i (%s)\n", confidence * 100.0f, img_class, net->GetClassDesc(img_class)); + + if( display != NULL ) + { + char str[256]; + sprintf(str, "GIE build %x | %s | %04.1f FPS | %05.2f%% %s", NV_GIE_VERSION, net->GetNetworkName(), display->GetFPS(), confidence * 100.0f, net->GetClassDesc(img_class)); + display->SetTitle(str); + } + } + + + // update display + if( display != NULL ) + { + display->UserEvents(); + display->BeginRender(); + + if( texture != NULL ) + { + // rescale image pixel intensities for display + CUDA(cudaNormalizeRGBA((float4*)imgRGBA, make_float2(0.0f, 255.0f), + (float4*)imgRGBA, make_float2(0.0f, 1.0f), + camera->GetWidth(), camera->GetHeight())); + + // map from CUDA to openGL using GL interop + void* tex_map = texture->MapCUDA(); + + if( tex_map != NULL ) + { + cudaMemcpy(tex_map, imgRGBA, texture->GetSize(), cudaMemcpyDeviceToDevice); + texture->Unmap(); + } + + // draw the texture + texture->Render(100,100); + } + + display->EndRender(); + } + } + + printf("\nimagenet-camera: un-initializing video device\n"); + + + /* + * shutdown the camera device + */ + if( camera != NULL ) + { + delete camera; + camera = NULL; + } + + if( display != NULL ) + { + delete display; + display = NULL; + } + + printf("imagenet-camera: video device has been un-initialized.\n"); + printf("imagenet-camera: this concludes the test of the video device.\n"); + return 0; +} + diff --git a/imagenet-console/CMakeLists.txt b/imagenet-console/CMakeLists.txt new file mode 100644 index 000000000..3e5d5522b --- /dev/null +++ b/imagenet-console/CMakeLists.txt @@ -0,0 +1,6 @@ + +file(GLOB imagenetConsoleSources *.cpp) +file(GLOB imagenetConsoleIncludes *.h ) + +cuda_add_executable(imagenet-console ${imagenetConsoleSources}) +target_link_libraries(imagenet-console nvcaffe_parser nvinfer jetson-inference) diff --git a/imagenet-console/imagenet-console.cpp b/imagenet-console/imagenet-console.cpp new file mode 100644 index 000000000..ecea53d01 --- /dev/null +++ b/imagenet-console/imagenet-console.cpp @@ -0,0 +1,67 @@ +/* + * inference-101 + */ + +#include "imageNet.h" +#include "loadImage.h" + + + +// main entry point +int main( int argc, char** argv ) +{ + printf("imagenet-console\n args (%i): ", argc); + + for( int i=0; i < argc; i++ ) + printf("%i [%s] ", i, argv[i]); + + printf("\n\n"); + + + // retrieve filename argument + if( argc < 2 ) + { + printf("imagenet-console: input image filename required\n"); + return 0; + } + + const char* imgFilename = argv[1]; + + + // create imageNet + imageNet* net = imageNet::Create(); + + if( !net ) + { + printf("imagenet-console: failed to initialize imageNet\n"); + return 0; + } + + // load image from file on disk + float* imgCPU = NULL; + float* imgCUDA = NULL; + int imgWidth = 0; + int imgHeight = 0; + + if( !loadImageRGBA(imgFilename, (float4**)&imgCPU, (float4**)&imgCUDA, &imgWidth, &imgHeight) ) + { + printf("failed to load image '%s'\n", imgFilename); + return 0; + } + + float confidence = 0.0f; + + // classify image + const int img_class = net->Classify(imgCUDA, imgWidth, imgHeight, &confidence); + + if( img_class < 0 ) + printf("imagenet-console: failed to classify '%s' (result=%i)\n", imgFilename, img_class); + else + printf("imagenet-console: '%s' -> %2.5f%% class #%i (%s)\n", imgFilename, confidence * 100.0f, img_class, net->GetClassDesc(img_class)); + + + printf("\nshutting down...\n"); + CUDA(cudaFreeHost(imgCPU)); + delete net; + return 0; +} diff --git a/imagenet-console/imagenet-example.backup b/imagenet-console/imagenet-example.backup new file mode 100644 index 000000000..29213196d --- /dev/null +++ b/imagenet-console/imagenet-example.backup @@ -0,0 +1,154 @@ +/* + * inference-101 + */ + +#include "caffeToGIE.h" +#include "cudaMappedMemory.h" +#include "loadImage.h" + +#include + + +// stuff we know about the network and the caffe input/output blobs +static const int MAX_BATCH_SIZE = 1; + +const char* INPUT_BLOB_NAME = "data"; +const char* OUTPUT_BLOB_NAME = "prob"; + +const char* modelPath = "bvlc_googlenet.caffemodel"; //"bvlc_alexnet.caffemodel"; +const char* protoPath = "googlenet.prototxt"; //"alexnet.prototxt"; + + +// main entry point +int main( int argc, char** argv ) +{ + printf("imagenet-console\n args (%i): ", argc); + + for( int i=0; i < argc; i++ ) + printf("%i [%s] ", i, argv[i]); + + printf("\n\n"); + + + // parse the caffe model and the mean file + std::stringstream gieModelStream; + gieModelStream.seekg(0, gieModelStream.beg); + + if( !caffeToGIEModel(protoPath, modelPath, std::vector< std::string > { OUTPUT_BLOB_NAME }, + MAX_BATCH_SIZE, gieModelStream) ) + { + printf("failed to load %s %s\n", protoPath, modelPath); + return 0; + } + + printf(LOG_GIE "imagenet loaded\n"); + + + // create runtime inference engine + nvinfer1::IRuntime* infer = createInferRuntime(gLogger); + + if( !infer ) + { + printf(LOG_GIE "failed to create InferRuntime\n"); + return 0; + } + + nvinfer1::ICudaEngine* engine = infer->deserializeCudaEngine(gieModelStream); + + if( !engine ) + { + printf(LOG_GIE "failed to create CUDA engine\n"); + return 0; + } + + nvinfer1::IExecutionContext* context = engine->createExecutionContext(); + + if( !context ) + { + printf(LOG_GIE "failed to create execution context\n"); + return 0; + } + + printf(LOG_GIE "CUDA engine context initialized with %u bindings\n", engine->getNbBindings()); + + + // locate the input/output network bindings + const int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME); + const int outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME); + + printf(LOG_GIE "imagenet input binding index: %i\n", inputIndex); + printf(LOG_GIE "imagenet output binding index: %i\n", outputIndex); + + const nvinfer1::Dims3 inputDims = engine->getBindingDimensions(inputIndex); + const nvinfer1::Dims3 outputDims = engine->getBindingDimensions(outputIndex); + + const size_t inputSize = inputDims.w * inputDims.h * inputDims.c * sizeof(float); + const size_t outputSize = outputDims.w * outputDims.h * outputDims.c * sizeof(float); + + printf(LOG_GIE "imagenet input dims (w=%u h=%u c=%u) size=%zu\n", inputDims.w, inputDims.h, inputDims.c, inputSize); + printf(LOG_GIE "imagenet output dims (w=%u h=%u c=%u) size=%zu\n", outputDims.w, outputDims.h, outputDims.c, outputSize); + + + // load image from file on disk + float3* imgCPU = NULL; + float3* imgCUDA = NULL; + int imgWidth = inputDims.w; + int imgHeight = inputDims.h; + + if( argc >= 2 ) + { + const char* imgFilename = argv[1]; + + if( !loadImageBGR(imgFilename, &imgCPU, &imgCUDA, &imgWidth, &imgHeight, make_float3(104.0069879317889f, 116.66876761696767f, 122.6789143406786f)) ) + { + printf("failed to load image '%s'\n", imgFilename); + return 0; + } + } + + + // allocate output memory to hold the image classes + float* outputCPU = NULL; + float* outputCUDA = NULL; + + if( !cudaAllocMapped((void**)&outputCPU, (void**)&outputCUDA, outputSize) ) + { + printf("failed to alloc CUDA mapped memory for %u output classes\n", outputDims.c); + return false; + } + + + // process with GIE + void* inferenceBuffers[] = { imgCUDA, outputCUDA }; + + context->execute(1, inferenceBuffers); + + CUDA(cudaDeviceSynchronize()); + + // determine the maximum class + int classIndex = -1; + float classMax = -1.0f; + + for( size_t n=0; n < outputDims.c; n++ ) + { + const float value = outputCPU[n]; + + if( value >= 0.01f ) + printf("class %04zu - %f\n", n, value); + + if( value > classMax ) + { + classIndex = n; + classMax = value; + } + } + + printf("\nmaximum class: %i (%f)\n", classIndex, classMax); + + + + printf("\nshutting down...\n"); + engine->destroy(); + infer->destroy(); + return 0; +} diff --git a/loadImage.cpp b/loadImage.cpp new file mode 100644 index 000000000..dd22b2a97 --- /dev/null +++ b/loadImage.cpp @@ -0,0 +1,182 @@ +/* + * inference-101 + */ + +#include "loadImage.h" +#include "cudaMappedMemory.h" + +#include + + +// loadImageRGBA +bool loadImageRGBA( const char* filename, float4** cpu, float4** gpu, int* width, int* height ) +{ + if( !filename || !cpu || !gpu || !width || !height ) + { + printf("loadImageRGB - invalid parameter\n"); + return false; + } + + // load original image + QImage qImg; + + if( !qImg.load(filename) ) + { + printf("failed to load image %s\n", filename); + return false; + } + + if( *width != 0 && *height != 0 ) + qImg = qImg.scaled(*width, *height, Qt::IgnoreAspectRatio); + + const uint32_t imgWidth = qImg.width(); + const uint32_t imgHeight = qImg.height(); + const uint32_t imgPixels = imgWidth * imgHeight; + const size_t imgSize = imgWidth * imgHeight * sizeof(float) * 4; + + printf("loaded image %s (%u x %u) %zu bytes\n", filename, imgWidth, imgHeight, imgSize); + + // allocate buffer for the image + if( !cudaAllocMapped((void**)cpu, (void**)gpu, imgSize) ) + { + printf(LOG_CUDA "failed to allocated %zu bytes for image %s\n", imgSize, filename); + return false; + } + + float4* cpuPtr = *cpu; + + for( uint32_t y=0; y < imgHeight; y++ ) + { + for( uint32_t x=0; x < imgWidth; x++ ) + { + const QRgb rgb = qImg.pixel(x,y); + const float4 px = make_float4(float(qRed(rgb)), + float(qGreen(rgb)), + float(qBlue(rgb)), 1.0f); + + cpuPtr[y*imgWidth+x] = px; + } + } + + *width = imgWidth; + *height = imgHeight; + return true; +} + + +// loadImageRGB +bool loadImageRGB( const char* filename, float3** cpu, float3** gpu, int* width, int* height, const float3& mean ) +{ + if( !filename || !cpu || !gpu || !width || !height ) + { + printf("loadImageRGB - invalid parameter\n"); + return false; + } + + // load original image + QImage qImg; + + if( !qImg.load(filename) ) + { + printf("failed to load image %s\n", filename); + return false; + } + + if( *width != 0 && *height != 0 ) + qImg = qImg.scaled(*width, *height, Qt::IgnoreAspectRatio); + + const uint32_t imgWidth = qImg.width(); + const uint32_t imgHeight = qImg.height(); + const uint32_t imgPixels = imgWidth * imgHeight; + const size_t imgSize = imgWidth * imgHeight * sizeof(float) * 3; + + printf("loaded image %s (%u x %u) %zu bytes\n", filename, imgWidth, imgHeight, imgSize); + + // allocate buffer for the image + if( !cudaAllocMapped((void**)cpu, (void**)gpu, imgSize) ) + { + printf(LOG_CUDA "failed to allocated %zu bytes for image %s\n", imgSize, filename); + return false; + } + + float* cpuPtr = (float*)*cpu; + + for( uint32_t y=0; y < imgHeight; y++ ) + { + for( uint32_t x=0; x < imgWidth; x++ ) + { + const QRgb rgb = qImg.pixel(x,y); + const float mul = 1.0f; //1.0f / 255.0f; + const float3 px = make_float3((float(qRed(rgb)) - mean.x) * mul, + (float(qGreen(rgb)) - mean.y) * mul, + (float(qBlue(rgb)) - mean.z) * mul ); + + // note: caffe/GIE is band-sequential (as opposed to the typical Band Interleaved by Pixel) + cpuPtr[imgPixels * 0 + y * imgWidth + x] = px.x; + cpuPtr[imgPixels * 1 + y * imgWidth + x] = px.y; + cpuPtr[imgPixels * 2 + y * imgWidth + x] = px.z; + } + } + + *width = imgWidth; + *height = imgHeight; + return true; +} + + +// loadImageBGR +bool loadImageBGR( const char* filename, float3** cpu, float3** gpu, int* width, int* height, const float3& mean ) +{ + if( !filename || !cpu || !gpu || !width || !height ) + { + printf("loadImageRGB - invalid parameter\n"); + return false; + } + + // load original image + QImage qImg; + + if( !qImg.load(filename) ) + { + printf("failed to load image %s\n", filename); + return false; + } + + if( *width != 0 && *height != 0 ) + qImg = qImg.scaled(*width, *height, Qt::IgnoreAspectRatio); + + const uint32_t imgWidth = qImg.width(); + const uint32_t imgHeight = qImg.height(); + const uint32_t imgPixels = imgWidth * imgHeight; + const size_t imgSize = imgWidth * imgHeight * sizeof(float) * 3; + + printf("loaded image %s (%u x %u) %zu bytes\n", filename, imgWidth, imgHeight, imgSize); + + // allocate buffer for the image + if( !cudaAllocMapped((void**)cpu, (void**)gpu, imgSize) ) + { + printf(LOG_CUDA "failed to allocated %zu bytes for image %s\n", imgSize, filename); + return false; + } + + float* cpuPtr = (float*)*cpu; + + for( uint32_t y=0; y < imgHeight; y++ ) + { + for( uint32_t x=0; x < imgWidth; x++ ) + { + const QRgb rgb = qImg.pixel(x,y); + const float mul = 1.0f; //1.0f / 255.0f; + const float3 px = make_float3((float(qBlue(rgb)) - mean.x) * mul, + (float(qGreen(rgb)) - mean.y) * mul, + (float(qRed(rgb)) - mean.z) * mul ); + + // note: caffe/GIE is band-sequential (as opposed to the typical Band Interleaved by Pixel) + cpuPtr[imgPixels * 0 + y * imgWidth + x] = px.x; + cpuPtr[imgPixels * 1 + y * imgWidth + x] = px.y; + cpuPtr[imgPixels * 2 + y * imgWidth + x] = px.z; + } + } + + return true; +} \ No newline at end of file diff --git a/loadImage.h b/loadImage.h new file mode 100644 index 000000000..367fa684e --- /dev/null +++ b/loadImage.h @@ -0,0 +1,54 @@ +/* + * inference-101 + */ + +#ifndef __IMAGE_LOADER_H_ +#define __IMAGE_LOADER_H_ + + +#include "cudaUtility.h" + + +/** + * Load a color image from disk into CUDA memory with alpha. + * This function loads the image into shared CPU/GPU memory, using the functions from cudaMappedMemory.h + * + * @param filename Path to the image file on disk. + * @param cpu Pointer to CPU buffer allocated containing the image. + * @param gpu Pointer to CUDA device buffer residing on GPU containing image. + * @param width Variable containing width in pixels of the image. + * @param height Variable containing height in pixels of the image. + */ +bool loadImageRGBA( const char* filename, float4** cpu, float4** gpu, int* width, int* height ); + + +/** + * Load a color image from disk into CUDA memory. + * This function loads the image into shared CPU/GPU memory, using the functions from cudaMappedMemory.h + * + * @param filename Path to the image file on disk. + * @param cpu Pointer to CPU buffer allocated containing the image. + * @param gpu Pointer to CUDA device buffer residing on GPU containing image. + * @param width Variable containing width in pixels of the image. + * @param height Variable containing height in pixels of the image. + */ +bool loadImageRGB( const char* filename, float3** cpu, float3** gpu, int* width, int* height, const float3& mean=make_float3(0,0,0) ); + + +/** + * Load a color image from disk into CUDA memory. + * This function loads the image into shared CPU/GPU memory, using the functions from cudaMappedMemory.h + * + * @param filename Path to the image file on disk. + * @param cpu Pointer to CPU buffer allocated containing the image. + * @param gpu Pointer to CUDA device buffer residing on GPU containing image. + * @param width Variable containing width in pixels of the image. + * @param height Variable containing height in pixels of the image. + */ +bool loadImageBGR( const char* filename, float3** cpu, float3** gpu, int* width, int* height, const float3& mean=make_float3(0,0,0) ); + + + + + +#endif diff --git a/logGIE.h b/logGIE.h new file mode 100644 index 000000000..7d9679146 --- /dev/null +++ b/logGIE.h @@ -0,0 +1,29 @@ +/* + * inference-101 + */ + +#ifndef __GIE_LOGGER_H +#define __GIE_LOGGER_H + +#include "Infer.h" +#include + + +#define LOG_GIE "[GIE] " + + +/** + * Logger for GIE info/warning/errors + */ +class Logger : public nvinfer1::ILogger +{ + void log( Severity severity, const char* msg ) override + { + if( severity != Severity::kINFO ) + printf(LOG_GIE "%s\n", msg); + } +} gLogger; + + + +#endif \ No newline at end of file