/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ const static char *const sSDKsample = "HSOpticalFlow"; // CPU-GPU discrepancy threshold for self-test const float THRESHOLD = 0.05f; #include #include "common.h" #include "flowGold.h" #include "flowCUDA.h" #include /////////////////////////////////////////////////////////////////////////////// /// \brief save optical flow in format described on vision.middlebury.edu/flow /// \param[in] name output file name /// \param[in] w optical flow field width /// \param[in] h optical flow field height /// \param[in] s optical flow field row stride /// \param[in] u horizontal displacement /// \param[in] v vertical displacement /////////////////////////////////////////////////////////////////////////////// void WriteFloFile(const char *name, int w, int h, int s, const float *u, const float *v) { FILE *stream; stream = fopen(name, "wb"); if (stream == 0) { printf("Could not save flow to \"%s\"\n", name); return; } float data = 202021.25f; fwrite(&data, sizeof(float), 1, stream); fwrite(&w, sizeof(w), 1, stream); fwrite(&h, sizeof(h), 1, stream); for (int i = 0; i < h; ++i) { for (int j = 0; j < w; ++j) { const int pos = j + i * s; fwrite(u + pos, sizeof(float), 1, stream); fwrite(v + pos, sizeof(float), 1, stream); } } fclose(stream); } /////////////////////////////////////////////////////////////////////////////// /// \brief /// load 4-channel unsigned byte image /// and convert it to single channel FP32 image /// \param[out] img_data pointer to raw image data /// \param[out] img_w image width /// \param[out] img_h image height /// \param[out] img_s image row stride /// \param[in] name image file name /// \param[in] exePath executable file path /// \return true if image is successfully loaded or false otherwise /////////////////////////////////////////////////////////////////////////////// bool LoadImageAsFP32(float *&img_data, int &img_w, int &img_h, int &img_s, const char *name, const char *exePath) { printf("Loading \"%s\" ...\n", name); char *name_ = sdkFindFilePath(name, exePath); if (!name_) { printf("File not found\n"); return false; } unsigned char *data = 0; unsigned int w = 0, h = 0; bool result = sdkLoadPPM4ub(name_, &data, &w, &h); if (result == false) { printf("Invalid file format\n"); return false; } img_w = w; img_h = h; img_s = iAlignUp(img_w); img_data = new float[img_s * h]; // source is 4 channel image const int widthStep = 4 * img_w; for (int i = 0; i < img_h; ++i) { for (int j = 0; j < img_w; ++j) { img_data[j + i * img_s] = ((float)data[j * 4 + i * widthStep]) / 255.0f; } } return true; } /////////////////////////////////////////////////////////////////////////////// /// \brief compare given flow field with gold (L1 norm) /// \param[in] width optical flow field width /// \param[in] height optical flow field height /// \param[in] stride optical flow field row stride /// \param[in] h_uGold horizontal displacement, gold /// \param[in] h_vGold vertical displacement, gold /// \param[in] h_u horizontal displacement /// \param[in] h_v vertical displacement /// \return true if discrepancy is lower than a given threshold /////////////////////////////////////////////////////////////////////////////// bool CompareWithGold(int width, int height, int stride, const float *h_uGold, const float *h_vGold, const float *h_u, const float *h_v) { float error = 0.0f; for (int i = 0; i < height; ++i) { for (int j = 0; j < width; ++j) { const int pos = j + i * stride; error += fabsf(h_u[pos] - h_uGold[pos]) + fabsf(h_v[pos] - h_vGold[pos]); } } error /= (float)(width * height); printf("L1 error : %.6f\n", error); return (error < THRESHOLD); } /////////////////////////////////////////////////////////////////////////////// /// application entry point /////////////////////////////////////////////////////////////////////////////// int main(int argc, char **argv) { // welcome message printf("%s Starting...\n\n", sSDKsample); // pick GPU findCudaDevice(argc, (const char **)argv); // find images const char *const sourceFrameName = "frame10.ppm"; const char *const targetFrameName = "frame11.ppm"; // image dimensions int width; int height; // row access stride int stride; // flow is computed from source image to target image float *h_source; // source image, host memory float *h_target; // target image, host memory // load image from file if (!LoadImageAsFP32(h_source, width, height, stride, sourceFrameName, argv[0])) { exit(EXIT_FAILURE); } if (!LoadImageAsFP32(h_target, width, height, stride, targetFrameName, argv[0])) { exit(EXIT_FAILURE); } // allocate host memory for CPU results float *h_uGold = new float[stride * height]; float *h_vGold = new float[stride * height]; // allocate host memory for GPU results float *h_u = new float[stride * height]; float *h_v = new float[stride * height]; // smoothness // if image brightness is not within [0,1] // this paramter should be scaled appropriately const float alpha = 0.2f; // number of pyramid levels const int nLevels = 5; // number of solver iterations on each level const int nSolverIters = 500; // number of warping iterations const int nWarpIters = 3; ComputeFlowGold(h_source, h_target, width, height, stride, alpha, nLevels, nWarpIters, nSolverIters, h_uGold, h_vGold); ComputeFlowCUDA(h_source, h_target, width, height, stride, alpha, nLevels, nWarpIters, nSolverIters, h_u, h_v); // compare results (L1 norm) bool status = CompareWithGold(width, height, stride, h_uGold, h_vGold, h_u, h_v); WriteFloFile("FlowGPU.flo", width, height, stride, h_u, h_v); WriteFloFile("FlowCPU.flo", width, height, stride, h_uGold, h_vGold); // free resources delete[] h_uGold; delete[] h_vGold; delete[] h_u; delete[] h_v; delete[] h_source; delete[] h_target; // report self-test status exit(status ? EXIT_SUCCESS : EXIT_FAILURE); }