cuda-samples/Samples/HSOpticalFlow/flowGold.cpp
2021-10-21 16:34:49 +05:30

463 lines
16 KiB
C++

/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "common.h"
#include "flowGold.h"
///////////////////////////////////////////////////////////////////////////////
/// \brief host texture fetch
///
/// read from arbitrary position within image using bilinear interpolation
/// out of range coords are mirrored
/// \param[in] t texture raw data
/// \param[in] w texture width
/// \param[in] h texture height
/// \param[in] s texture stride
/// \param[in] x x coord of the point to fetch value at
/// \param[in] y y coord of the point to fetch value at
/// \return fetched value
///////////////////////////////////////////////////////////////////////////////
inline float Tex2D(const float *t, int w, int h, int s, float x, float y) {
// integer parts in floating point format
float intPartX, intPartY;
// get fractional parts of coordinates
float dx = fabsf(modff(x, &intPartX));
float dy = fabsf(modff(y, &intPartY));
// assume pixels are squares
// one of the corners
int ix0 = (int)intPartX;
int iy0 = (int)intPartY;
// mirror out-of-range position
if (ix0 < 0) ix0 = abs(ix0 + 1);
if (iy0 < 0) iy0 = abs(iy0 + 1);
if (ix0 >= w) ix0 = w * 2 - ix0 - 1;
if (iy0 >= h) iy0 = h * 2 - iy0 - 1;
// corner which is opposite to (ix0, iy0)
int ix1 = ix0 + 1;
int iy1 = iy0 + 1;
if (ix1 >= w) ix1 = w * 2 - ix1 - 1;
if (iy1 >= h) iy1 = h * 2 - iy1 - 1;
float res = t[ix0 + iy0 * s] * (1.0f - dx) * (1.0f - dy);
res += t[ix1 + iy0 * s] * dx * (1.0f - dy);
res += t[ix0 + iy1 * s] * (1.0f - dx) * dy;
res += t[ix1 + iy1 * s] * dx * dy;
return res;
}
///////////////////////////////////////////////////////////////////////////////
/// \brief host texture fetch
///
/// read specific texel value
/// out of range coords are mirrored
/// \param[in] t texture raw data
/// \param[in] w texture width
/// \param[in] h texture height
/// \param[in] s texture stride
/// \param[in] x x coord of the point to fetch value at
/// \param[in] y y coord of the point to fetch value at
/// \return fetched value
///////////////////////////////////////////////////////////////////////////////
inline float Tex2Di(const float *src, int w, int h, int s, int x, int y) {
if (x < 0) x = abs(x + 1);
if (y < 0) y = abs(y + 1);
if (x >= w) x = w * 2 - x - 1;
if (y >= h) y = h * 2 - y - 1;
return src[x + y * s];
}
///////////////////////////////////////////////////////////////////////////////
/// \brief resize image
/// \param[in] src image to downscale
/// \param[in] width image width
/// \param[in] height image height
/// \param[in] stride image stride
/// \param[in] newWidth image new width
/// \param[in] newHeight image new height
/// \param[in] newStride image new stride
/// \param[out] out downscaled image data
///////////////////////////////////////////////////////////////////////////////
static void Downscale(const float *src, int width, int height, int stride,
int newWidth, int newHeight, int newStride, float *out) {
for (int i = 0; i < newHeight; ++i) {
for (int j = 0; j < newWidth; ++j) {
const int srcX = j * 2;
const int srcY = i * 2;
// average 4 neighbouring pixels
float sum;
sum = Tex2Di(src, width, height, stride, srcX + 0, srcY + 0);
sum += Tex2Di(src, width, height, stride, srcX + 0, srcY + 1);
sum += Tex2Di(src, width, height, stride, srcX + 1, srcY + 0);
sum += Tex2Di(src, width, height, stride, srcX + 1, srcY + 1);
// normalize
sum *= 0.25f;
out[j + i * newStride] = sum;
}
}
}
///////////////////////////////////////////////////////////////////////////////
/// \brief upscale one component of a displacement field
/// \param[in] src field component to upscale
/// \param[in] width field current width
/// \param[in] height field current height
/// \param[in] stride field current stride
/// \param[in] newWidth field new width
/// \param[in] newHeight field new height
/// \param[in] newStride field new stride
/// \param[in] scale value scale factor (multiplier)
/// \param[out] out upscaled field component
///////////////////////////////////////////////////////////////////////////////
static void Upscale(const float *src, int width, int height, int stride,
int newWidth, int newHeight, int newStride, float scale,
float *out) {
for (int i = 0; i < newHeight; ++i) {
for (int j = 0; j < newWidth; ++j) {
// position within smaller image
float x = ((float)j - 0.5f) * 0.5f;
float y = ((float)i - 0.5f) * 0.5f;
out[j + i * newStride] = Tex2D(src, width, height, stride, x, y) * scale;
}
}
}
///////////////////////////////////////////////////////////////////////////////
/// \brief warp image with provided vector field
///
/// For each output pixel there is a vector which tells which pixel
/// from a source image should be mapped to this particular output
/// pixel.
/// It is assumed that images and the vector field have the same stride and
/// resolution.
/// \param[in] src source image
/// \param[in] w width
/// \param[in] h height
/// \param[in] s stride
/// \param[in] u horizontal displacement
/// \param[in] v vertical displacement
/// \param[out] out warped image
///////////////////////////////////////////////////////////////////////////////
static void WarpImage(const float *src, int w, int h, int s, const float *u,
const float *v, float *out) {
for (int i = 0; i < h; ++i) {
for (int j = 0; j < w; ++j) {
const int pos = j + i * s;
// warped coords
float x = (float)j + u[pos];
float y = (float)i + v[pos];
out[pos] = Tex2D(src, w, h, s, x, y);
}
}
}
///////////////////////////////////////////////////////////////////////////////
/// \brief computes image derivatives for a pair of images
/// \param[in] I0 source image
/// \param[in] I1 tracked image
/// \param[in] w images width
/// \param[in] h images height
/// \param[in] s images stride
/// \param[out] Ix x derivative
/// \param[out] Iy y derivative
/// \param[out] Iz temporal derivative
///////////////////////////////////////////////////////////////////////////////
static void ComputeDerivatives(const float *I0, const float *I1, int w, int h,
int s, float *Ix, float *Iy, float *Iz) {
for (int i = 0; i < h; ++i) {
for (int j = 0; j < w; ++j) {
const int pos = j + i * s;
float t0, t1;
// derivative filter is (1, -8, 0, 8, -1)/12
// x derivative
t0 = Tex2Di(I0, w, h, s, j - 2, i);
t0 -= Tex2Di(I0, w, h, s, j - 1, i) * 8.0f;
t0 += Tex2Di(I0, w, h, s, j + 1, i) * 8.0f;
t0 -= Tex2Di(I0, w, h, s, j + 2, i);
t0 /= 12.0f;
t1 = Tex2Di(I1, w, h, s, j - 2, i);
t1 -= Tex2Di(I1, w, h, s, j - 1, i) * 8.0f;
t1 += Tex2Di(I1, w, h, s, j + 1, i) * 8.0f;
t1 -= Tex2Di(I1, w, h, s, j + 2, i);
t1 /= 12.0f;
// spatial derivatives are averaged
Ix[pos] = (t0 + t1) * 0.5f;
// t derivative
Iz[pos] = I1[pos] - I0[pos];
// y derivative
t0 = Tex2Di(I0, w, h, s, j, i - 2);
t0 -= Tex2Di(I0, w, h, s, j, i - 1) * 8.0f;
t0 += Tex2Di(I0, w, h, s, j, i + 1) * 8.0f;
t0 -= Tex2Di(I0, w, h, s, j, i + 2);
t0 /= 12.0f;
t1 = Tex2Di(I1, w, h, s, j, i - 2);
t1 -= Tex2Di(I1, w, h, s, j, i - 1) * 8.0f;
t1 += Tex2Di(I1, w, h, s, j, i + 1) * 8.0f;
t1 -= Tex2Di(I1, w, h, s, j, i + 2);
t1 /= 12.0f;
Iy[pos] = (t0 + t1) * 0.5f;
}
}
}
///////////////////////////////////////////////////////////////////////////////
/// \brief one iteration of classical Horn-Schunck method
///
/// It is one iteration of Jacobi method for a corresponding linear system
/// \param[in] du0 current horizontal displacement approximation
/// \param[in] dv0 current vertical displacement approximation
/// \param[in] Ix image x derivative
/// \param[in] Iy image y derivative
/// \param[in] Iz temporal derivative
/// \param[in] w width
/// \param[in] h height
/// \param[in] s stride
/// \param[in] alpha degree of smoothness
/// \param[out] du1 new horizontal displacement approximation
/// \param[out] dv1 new vertical displacement approximation
///////////////////////////////////////////////////////////////////////////////
static void SolveForUpdate(const float *du0, const float *dv0, const float *Ix,
const float *Iy, const float *Iz, int w, int h,
int s, float alpha, float *du1, float *dv1) {
for (int i = 0; i < h; ++i) {
for (int j = 0; j < w; ++j) {
const int pos = j + i * s;
int left, right, up, down;
// handle borders
if (j != 0)
left = pos - 1;
else
left = pos;
if (j != w - 1)
right = pos + 1;
else
right = pos;
if (i != 0)
down = pos - s;
else
down = pos;
if (i != h - 1)
up = pos + s;
else
up = pos;
float sumU = (du0[left] + du0[right] + du0[up] + du0[down]) * 0.25f;
float sumV = (dv0[left] + dv0[right] + dv0[up] + dv0[down]) * 0.25f;
float frac = (Ix[pos] * sumU + Iy[pos] * sumV + Iz[pos]) /
(Ix[pos] * Ix[pos] + Iy[pos] * Iy[pos] + alpha);
du1[pos] = sumU - Ix[pos] * frac;
dv1[pos] = sumV - Iy[pos] * frac;
}
}
}
///////////////////////////////////////////////////////////////////////////////
/// \brief method logic
///
/// handles memory allocation and control flow
/// \param[in] I0 source image
/// \param[in] I1 tracked image
/// \param[in] width images width
/// \param[in] height images height
/// \param[in] stride images stride
/// \param[in] alpha degree of displacement field smoothness
/// \param[in] nLevels number of levels in a pyramid
/// \param[in] nWarpIters number of warping iterations per pyramid level
/// \param[in] nSolverIters number of solver iterations (Jacobi iterations)
/// \param[out] u horizontal displacement
/// \param[out] v vertical displacement
///////////////////////////////////////////////////////////////////////////////
void ComputeFlowGold(const float *I0, const float *I1, int width, int height,
int stride, float alpha, int nLevels, int nWarpIters,
int nSolverIters, float *u, float *v) {
printf("Computing optical flow on CPU...\n");
float *u0 = u;
float *v0 = v;
const float **pI0 = new const float *[nLevels];
const float **pI1 = new const float *[nLevels];
int *pW = new int[nLevels];
int *pH = new int[nLevels];
int *pS = new int[nLevels];
const int pixelCountAligned = height * stride;
float *tmp = new float[pixelCountAligned];
float *du0 = new float[pixelCountAligned];
float *dv0 = new float[pixelCountAligned];
float *du1 = new float[pixelCountAligned];
float *dv1 = new float[pixelCountAligned];
float *Ix = new float[pixelCountAligned];
float *Iy = new float[pixelCountAligned];
float *Iz = new float[pixelCountAligned];
float *nu = new float[pixelCountAligned];
float *nv = new float[pixelCountAligned];
// prepare pyramid
int currentLevel = nLevels - 1;
pI0[currentLevel] = I0;
pI1[currentLevel] = I1;
pW[currentLevel] = width;
pH[currentLevel] = height;
pS[currentLevel] = stride;
for (; currentLevel > 0; --currentLevel) {
int nw = pW[currentLevel] / 2;
int nh = pH[currentLevel] / 2;
int ns = iAlignUp(nw);
pI0[currentLevel - 1] = new float[ns * nh];
pI1[currentLevel - 1] = new float[ns * nh];
Downscale(pI0[currentLevel], pW[currentLevel], pH[currentLevel],
pS[currentLevel], nw, nh, ns, (float *)pI0[currentLevel - 1]);
Downscale(pI1[currentLevel], pW[currentLevel], pH[currentLevel],
pS[currentLevel], nw, nh, ns, (float *)pI1[currentLevel - 1]);
pW[currentLevel - 1] = nw;
pH[currentLevel - 1] = nh;
pS[currentLevel - 1] = ns;
}
// initial approximation
memset(u, 0, stride * height * sizeof(float));
memset(v, 0, stride * height * sizeof(float));
// compute flow
for (; currentLevel < nLevels; ++currentLevel) {
for (int warpIter = 0; warpIter < nWarpIters; ++warpIter) {
memset(du0, 0, pixelCountAligned * sizeof(float));
memset(dv0, 0, pixelCountAligned * sizeof(float));
memset(du1, 0, pixelCountAligned * sizeof(float));
memset(dv1, 0, pixelCountAligned * sizeof(float));
WarpImage(pI1[currentLevel], pW[currentLevel], pH[currentLevel],
pS[currentLevel], u, v, tmp);
// on current level we compute optical flow
// between frame 0 and warped frame 1
ComputeDerivatives(pI0[currentLevel], tmp, pW[currentLevel],
pH[currentLevel], pS[currentLevel], Ix, Iy, Iz);
for (int iter = 0; iter < nSolverIters; ++iter) {
SolveForUpdate(du0, dv0, Ix, Iy, Iz, pW[currentLevel], pH[currentLevel],
pS[currentLevel], alpha, du1, dv1);
Swap(du0, du1);
Swap(dv0, dv1);
}
// update u, v
for (int i = 0; i < pH[currentLevel] * pS[currentLevel]; ++i) {
u[i] += du0[i];
v[i] += dv0[i];
}
} // end for (int warpIter = 0; warpIter < nWarpIters; ++warpIter)
if (currentLevel != nLevels - 1) {
// prolongate solution
float scaleX = (float)pW[currentLevel + 1] / (float)pW[currentLevel];
Upscale(u, pW[currentLevel], pH[currentLevel], pS[currentLevel],
pW[currentLevel + 1], pH[currentLevel + 1], pS[currentLevel + 1],
scaleX, nu);
float scaleY = (float)pH[currentLevel + 1] / (float)pH[currentLevel];
Upscale(v, pW[currentLevel], pH[currentLevel], pS[currentLevel],
pW[currentLevel + 1], pH[currentLevel + 1], pS[currentLevel + 1],
scaleY, nv);
Swap(u, nu);
Swap(v, nv);
}
} // end for (; currentLevel < nLevels; ++currentLevel)
if (u != u0) {
// solution is not in the specified array
// copy
memcpy(u0, u, pixelCountAligned * sizeof(float));
memcpy(v0, v, pixelCountAligned * sizeof(float));
Swap(u, nu);
Swap(v, nv);
}
// cleanup
// last level is not being freed here
// because it refers to input images
for (int i = 0; i < nLevels - 1; ++i) {
delete[] pI0[i];
delete[] pI1[i];
}
delete[] pI0;
delete[] pI1;
delete[] pW;
delete[] pH;
delete[] pS;
delete[] tmp;
delete[] du0;
delete[] dv0;
delete[] du1;
delete[] dv1;
delete[] Ix;
delete[] Iy;
delete[] Iz;
delete[] nu;
delete[] nv;
}