/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "common.h" #include "flowGold.h" /////////////////////////////////////////////////////////////////////////////// /// \brief host texture fetch /// /// read from arbitrary position within image using bilinear interpolation /// out of range coords are mirrored /// \param[in] t texture raw data /// \param[in] w texture width /// \param[in] h texture height /// \param[in] s texture stride /// \param[in] x x coord of the point to fetch value at /// \param[in] y y coord of the point to fetch value at /// \return fetched value /////////////////////////////////////////////////////////////////////////////// inline float Tex2D(const float *t, int w, int h, int s, float x, float y) { // integer parts in floating point format float intPartX, intPartY; // get fractional parts of coordinates float dx = fabsf(modff(x, &intPartX)); float dy = fabsf(modff(y, &intPartY)); // assume pixels are squares // one of the corners int ix0 = (int)intPartX; int iy0 = (int)intPartY; // mirror out-of-range position if (ix0 < 0) ix0 = abs(ix0 + 1); if (iy0 < 0) iy0 = abs(iy0 + 1); if (ix0 >= w) ix0 = w * 2 - ix0 - 1; if (iy0 >= h) iy0 = h * 2 - iy0 - 1; // corner which is opposite to (ix0, iy0) int ix1 = ix0 + 1; int iy1 = iy0 + 1; if (ix1 >= w) ix1 = w * 2 - ix1 - 1; if (iy1 >= h) iy1 = h * 2 - iy1 - 1; float res = t[ix0 + iy0 * s] * (1.0f - dx) * (1.0f - dy); res += t[ix1 + iy0 * s] * dx * (1.0f - dy); res += t[ix0 + iy1 * s] * (1.0f - dx) * dy; res += t[ix1 + iy1 * s] * dx * dy; return res; } /////////////////////////////////////////////////////////////////////////////// /// \brief host texture fetch /// /// read specific texel value /// out of range coords are mirrored /// \param[in] t texture raw data /// \param[in] w texture width /// \param[in] h texture height /// \param[in] s texture stride /// \param[in] x x coord of the point to fetch value at /// \param[in] y y coord of the point to fetch value at /// \return fetched value /////////////////////////////////////////////////////////////////////////////// inline float Tex2Di(const float *src, int w, int h, int s, int x, int y) { if (x < 0) x = abs(x + 1); if (y < 0) y = abs(y + 1); if (x >= w) x = w * 2 - x - 1; if (y >= h) y = h * 2 - y - 1; return src[x + y * s]; } /////////////////////////////////////////////////////////////////////////////// /// \brief resize image /// \param[in] src image to downscale /// \param[in] width image width /// \param[in] height image height /// \param[in] stride image stride /// \param[in] newWidth image new width /// \param[in] newHeight image new height /// \param[in] newStride image new stride /// \param[out] out downscaled image data /////////////////////////////////////////////////////////////////////////////// static void Downscale(const float *src, int width, int height, int stride, int newWidth, int newHeight, int newStride, float *out) { for (int i = 0; i < newHeight; ++i) { for (int j = 0; j < newWidth; ++j) { const int srcX = j * 2; const int srcY = i * 2; // average 4 neighbouring pixels float sum; sum = Tex2Di(src, width, height, stride, srcX + 0, srcY + 0); sum += Tex2Di(src, width, height, stride, srcX + 0, srcY + 1); sum += Tex2Di(src, width, height, stride, srcX + 1, srcY + 0); sum += Tex2Di(src, width, height, stride, srcX + 1, srcY + 1); // normalize sum *= 0.25f; out[j + i * newStride] = sum; } } } /////////////////////////////////////////////////////////////////////////////// /// \brief upscale one component of a displacement field /// \param[in] src field component to upscale /// \param[in] width field current width /// \param[in] height field current height /// \param[in] stride field current stride /// \param[in] newWidth field new width /// \param[in] newHeight field new height /// \param[in] newStride field new stride /// \param[in] scale value scale factor (multiplier) /// \param[out] out upscaled field component /////////////////////////////////////////////////////////////////////////////// static void Upscale(const float *src, int width, int height, int stride, int newWidth, int newHeight, int newStride, float scale, float *out) { for (int i = 0; i < newHeight; ++i) { for (int j = 0; j < newWidth; ++j) { // position within smaller image float x = ((float)j - 0.5f) * 0.5f; float y = ((float)i - 0.5f) * 0.5f; out[j + i * newStride] = Tex2D(src, width, height, stride, x, y) * scale; } } } /////////////////////////////////////////////////////////////////////////////// /// \brief warp image with provided vector field /// /// For each output pixel there is a vector which tells which pixel /// from a source image should be mapped to this particular output /// pixel. /// It is assumed that images and the vector field have the same stride and /// resolution. /// \param[in] src source image /// \param[in] w width /// \param[in] h height /// \param[in] s stride /// \param[in] u horizontal displacement /// \param[in] v vertical displacement /// \param[out] out warped image /////////////////////////////////////////////////////////////////////////////// static void WarpImage(const float *src, int w, int h, int s, const float *u, const float *v, float *out) { for (int i = 0; i < h; ++i) { for (int j = 0; j < w; ++j) { const int pos = j + i * s; // warped coords float x = (float)j + u[pos]; float y = (float)i + v[pos]; out[pos] = Tex2D(src, w, h, s, x, y); } } } /////////////////////////////////////////////////////////////////////////////// /// \brief computes image derivatives for a pair of images /// \param[in] I0 source image /// \param[in] I1 tracked image /// \param[in] w images width /// \param[in] h images height /// \param[in] s images stride /// \param[out] Ix x derivative /// \param[out] Iy y derivative /// \param[out] Iz temporal derivative /////////////////////////////////////////////////////////////////////////////// static void ComputeDerivatives(const float *I0, const float *I1, int w, int h, int s, float *Ix, float *Iy, float *Iz) { for (int i = 0; i < h; ++i) { for (int j = 0; j < w; ++j) { const int pos = j + i * s; float t0, t1; // derivative filter is (1, -8, 0, 8, -1)/12 // x derivative t0 = Tex2Di(I0, w, h, s, j - 2, i); t0 -= Tex2Di(I0, w, h, s, j - 1, i) * 8.0f; t0 += Tex2Di(I0, w, h, s, j + 1, i) * 8.0f; t0 -= Tex2Di(I0, w, h, s, j + 2, i); t0 /= 12.0f; t1 = Tex2Di(I1, w, h, s, j - 2, i); t1 -= Tex2Di(I1, w, h, s, j - 1, i) * 8.0f; t1 += Tex2Di(I1, w, h, s, j + 1, i) * 8.0f; t1 -= Tex2Di(I1, w, h, s, j + 2, i); t1 /= 12.0f; // spatial derivatives are averaged Ix[pos] = (t0 + t1) * 0.5f; // t derivative Iz[pos] = I1[pos] - I0[pos]; // y derivative t0 = Tex2Di(I0, w, h, s, j, i - 2); t0 -= Tex2Di(I0, w, h, s, j, i - 1) * 8.0f; t0 += Tex2Di(I0, w, h, s, j, i + 1) * 8.0f; t0 -= Tex2Di(I0, w, h, s, j, i + 2); t0 /= 12.0f; t1 = Tex2Di(I1, w, h, s, j, i - 2); t1 -= Tex2Di(I1, w, h, s, j, i - 1) * 8.0f; t1 += Tex2Di(I1, w, h, s, j, i + 1) * 8.0f; t1 -= Tex2Di(I1, w, h, s, j, i + 2); t1 /= 12.0f; Iy[pos] = (t0 + t1) * 0.5f; } } } /////////////////////////////////////////////////////////////////////////////// /// \brief one iteration of classical Horn-Schunck method /// /// It is one iteration of Jacobi method for a corresponding linear system /// \param[in] du0 current horizontal displacement approximation /// \param[in] dv0 current vertical displacement approximation /// \param[in] Ix image x derivative /// \param[in] Iy image y derivative /// \param[in] Iz temporal derivative /// \param[in] w width /// \param[in] h height /// \param[in] s stride /// \param[in] alpha degree of smoothness /// \param[out] du1 new horizontal displacement approximation /// \param[out] dv1 new vertical displacement approximation /////////////////////////////////////////////////////////////////////////////// static void SolveForUpdate(const float *du0, const float *dv0, const float *Ix, const float *Iy, const float *Iz, int w, int h, int s, float alpha, float *du1, float *dv1) { for (int i = 0; i < h; ++i) { for (int j = 0; j < w; ++j) { const int pos = j + i * s; int left, right, up, down; // handle borders if (j != 0) left = pos - 1; else left = pos; if (j != w - 1) right = pos + 1; else right = pos; if (i != 0) down = pos - s; else down = pos; if (i != h - 1) up = pos + s; else up = pos; float sumU = (du0[left] + du0[right] + du0[up] + du0[down]) * 0.25f; float sumV = (dv0[left] + dv0[right] + dv0[up] + dv0[down]) * 0.25f; float frac = (Ix[pos] * sumU + Iy[pos] * sumV + Iz[pos]) / (Ix[pos] * Ix[pos] + Iy[pos] * Iy[pos] + alpha); du1[pos] = sumU - Ix[pos] * frac; dv1[pos] = sumV - Iy[pos] * frac; } } } /////////////////////////////////////////////////////////////////////////////// /// \brief method logic /// /// handles memory allocation and control flow /// \param[in] I0 source image /// \param[in] I1 tracked image /// \param[in] width images width /// \param[in] height images height /// \param[in] stride images stride /// \param[in] alpha degree of displacement field smoothness /// \param[in] nLevels number of levels in a pyramid /// \param[in] nWarpIters number of warping iterations per pyramid level /// \param[in] nSolverIters number of solver iterations (Jacobi iterations) /// \param[out] u horizontal displacement /// \param[out] v vertical displacement /////////////////////////////////////////////////////////////////////////////// void ComputeFlowGold(const float *I0, const float *I1, int width, int height, int stride, float alpha, int nLevels, int nWarpIters, int nSolverIters, float *u, float *v) { printf("Computing optical flow on CPU...\n"); float *u0 = u; float *v0 = v; const float **pI0 = new const float *[nLevels]; const float **pI1 = new const float *[nLevels]; int *pW = new int[nLevels]; int *pH = new int[nLevels]; int *pS = new int[nLevels]; const int pixelCountAligned = height * stride; float *tmp = new float[pixelCountAligned]; float *du0 = new float[pixelCountAligned]; float *dv0 = new float[pixelCountAligned]; float *du1 = new float[pixelCountAligned]; float *dv1 = new float[pixelCountAligned]; float *Ix = new float[pixelCountAligned]; float *Iy = new float[pixelCountAligned]; float *Iz = new float[pixelCountAligned]; float *nu = new float[pixelCountAligned]; float *nv = new float[pixelCountAligned]; // prepare pyramid int currentLevel = nLevels - 1; pI0[currentLevel] = I0; pI1[currentLevel] = I1; pW[currentLevel] = width; pH[currentLevel] = height; pS[currentLevel] = stride; for (; currentLevel > 0; --currentLevel) { int nw = pW[currentLevel] / 2; int nh = pH[currentLevel] / 2; int ns = iAlignUp(nw); pI0[currentLevel - 1] = new float[ns * nh]; pI1[currentLevel - 1] = new float[ns * nh]; Downscale(pI0[currentLevel], pW[currentLevel], pH[currentLevel], pS[currentLevel], nw, nh, ns, (float *)pI0[currentLevel - 1]); Downscale(pI1[currentLevel], pW[currentLevel], pH[currentLevel], pS[currentLevel], nw, nh, ns, (float *)pI1[currentLevel - 1]); pW[currentLevel - 1] = nw; pH[currentLevel - 1] = nh; pS[currentLevel - 1] = ns; } // initial approximation memset(u, 0, stride * height * sizeof(float)); memset(v, 0, stride * height * sizeof(float)); // compute flow for (; currentLevel < nLevels; ++currentLevel) { for (int warpIter = 0; warpIter < nWarpIters; ++warpIter) { memset(du0, 0, pixelCountAligned * sizeof(float)); memset(dv0, 0, pixelCountAligned * sizeof(float)); memset(du1, 0, pixelCountAligned * sizeof(float)); memset(dv1, 0, pixelCountAligned * sizeof(float)); WarpImage(pI1[currentLevel], pW[currentLevel], pH[currentLevel], pS[currentLevel], u, v, tmp); // on current level we compute optical flow // between frame 0 and warped frame 1 ComputeDerivatives(pI0[currentLevel], tmp, pW[currentLevel], pH[currentLevel], pS[currentLevel], Ix, Iy, Iz); for (int iter = 0; iter < nSolverIters; ++iter) { SolveForUpdate(du0, dv0, Ix, Iy, Iz, pW[currentLevel], pH[currentLevel], pS[currentLevel], alpha, du1, dv1); Swap(du0, du1); Swap(dv0, dv1); } // update u, v for (int i = 0; i < pH[currentLevel] * pS[currentLevel]; ++i) { u[i] += du0[i]; v[i] += dv0[i]; } } // end for (int warpIter = 0; warpIter < nWarpIters; ++warpIter) if (currentLevel != nLevels - 1) { // prolongate solution float scaleX = (float)pW[currentLevel + 1] / (float)pW[currentLevel]; Upscale(u, pW[currentLevel], pH[currentLevel], pS[currentLevel], pW[currentLevel + 1], pH[currentLevel + 1], pS[currentLevel + 1], scaleX, nu); float scaleY = (float)pH[currentLevel + 1] / (float)pH[currentLevel]; Upscale(v, pW[currentLevel], pH[currentLevel], pS[currentLevel], pW[currentLevel + 1], pH[currentLevel + 1], pS[currentLevel + 1], scaleY, nv); Swap(u, nu); Swap(v, nv); } } // end for (; currentLevel < nLevels; ++currentLevel) if (u != u0) { // solution is not in the specified array // copy memcpy(u0, u, pixelCountAligned * sizeof(float)); memcpy(v0, v, pixelCountAligned * sizeof(float)); Swap(u, nu); Swap(v, nv); } // cleanup // last level is not being freed here // because it refers to input images for (int i = 0; i < nLevels - 1; ++i) { delete[] pI0[i]; delete[] pI1[i]; } delete[] pI0; delete[] pI1; delete[] pW; delete[] pH; delete[] pS; delete[] tmp; delete[] du0; delete[] dv0; delete[] du1; delete[] dv1; delete[] Ix; delete[] Iy; delete[] Iz; delete[] nu; delete[] nv; }