mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-01-19 22:25:49 +08:00
463 lines
16 KiB
C++
463 lines
16 KiB
C++
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "common.h"
|
|
#include "flowGold.h"
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
/// \brief host texture fetch
|
|
///
|
|
/// read from arbitrary position within image using bilinear interpolation
|
|
/// out of range coords are mirrored
|
|
/// \param[in] t texture raw data
|
|
/// \param[in] w texture width
|
|
/// \param[in] h texture height
|
|
/// \param[in] s texture stride
|
|
/// \param[in] x x coord of the point to fetch value at
|
|
/// \param[in] y y coord of the point to fetch value at
|
|
/// \return fetched value
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
inline float Tex2D(const float *t, int w, int h, int s, float x, float y) {
|
|
// integer parts in floating point format
|
|
float intPartX, intPartY;
|
|
|
|
// get fractional parts of coordinates
|
|
float dx = fabsf(modff(x, &intPartX));
|
|
float dy = fabsf(modff(y, &intPartY));
|
|
|
|
// assume pixels are squares
|
|
// one of the corners
|
|
int ix0 = (int)intPartX;
|
|
int iy0 = (int)intPartY;
|
|
|
|
// mirror out-of-range position
|
|
if (ix0 < 0) ix0 = abs(ix0 + 1);
|
|
|
|
if (iy0 < 0) iy0 = abs(iy0 + 1);
|
|
|
|
if (ix0 >= w) ix0 = w * 2 - ix0 - 1;
|
|
|
|
if (iy0 >= h) iy0 = h * 2 - iy0 - 1;
|
|
|
|
// corner which is opposite to (ix0, iy0)
|
|
int ix1 = ix0 + 1;
|
|
int iy1 = iy0 + 1;
|
|
|
|
if (ix1 >= w) ix1 = w * 2 - ix1 - 1;
|
|
|
|
if (iy1 >= h) iy1 = h * 2 - iy1 - 1;
|
|
|
|
float res = t[ix0 + iy0 * s] * (1.0f - dx) * (1.0f - dy);
|
|
res += t[ix1 + iy0 * s] * dx * (1.0f - dy);
|
|
res += t[ix0 + iy1 * s] * (1.0f - dx) * dy;
|
|
res += t[ix1 + iy1 * s] * dx * dy;
|
|
|
|
return res;
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
/// \brief host texture fetch
|
|
///
|
|
/// read specific texel value
|
|
/// out of range coords are mirrored
|
|
/// \param[in] t texture raw data
|
|
/// \param[in] w texture width
|
|
/// \param[in] h texture height
|
|
/// \param[in] s texture stride
|
|
/// \param[in] x x coord of the point to fetch value at
|
|
/// \param[in] y y coord of the point to fetch value at
|
|
/// \return fetched value
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
inline float Tex2Di(const float *src, int w, int h, int s, int x, int y) {
|
|
if (x < 0) x = abs(x + 1);
|
|
|
|
if (y < 0) y = abs(y + 1);
|
|
|
|
if (x >= w) x = w * 2 - x - 1;
|
|
|
|
if (y >= h) y = h * 2 - y - 1;
|
|
|
|
return src[x + y * s];
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
/// \brief resize image
|
|
/// \param[in] src image to downscale
|
|
/// \param[in] width image width
|
|
/// \param[in] height image height
|
|
/// \param[in] stride image stride
|
|
/// \param[in] newWidth image new width
|
|
/// \param[in] newHeight image new height
|
|
/// \param[in] newStride image new stride
|
|
/// \param[out] out downscaled image data
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
static void Downscale(const float *src, int width, int height, int stride,
|
|
int newWidth, int newHeight, int newStride, float *out) {
|
|
for (int i = 0; i < newHeight; ++i) {
|
|
for (int j = 0; j < newWidth; ++j) {
|
|
const int srcX = j * 2;
|
|
const int srcY = i * 2;
|
|
// average 4 neighbouring pixels
|
|
float sum;
|
|
sum = Tex2Di(src, width, height, stride, srcX + 0, srcY + 0);
|
|
sum += Tex2Di(src, width, height, stride, srcX + 0, srcY + 1);
|
|
sum += Tex2Di(src, width, height, stride, srcX + 1, srcY + 0);
|
|
sum += Tex2Di(src, width, height, stride, srcX + 1, srcY + 1);
|
|
// normalize
|
|
sum *= 0.25f;
|
|
out[j + i * newStride] = sum;
|
|
}
|
|
}
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
/// \brief upscale one component of a displacement field
|
|
/// \param[in] src field component to upscale
|
|
/// \param[in] width field current width
|
|
/// \param[in] height field current height
|
|
/// \param[in] stride field current stride
|
|
/// \param[in] newWidth field new width
|
|
/// \param[in] newHeight field new height
|
|
/// \param[in] newStride field new stride
|
|
/// \param[in] scale value scale factor (multiplier)
|
|
/// \param[out] out upscaled field component
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
static void Upscale(const float *src, int width, int height, int stride,
|
|
int newWidth, int newHeight, int newStride, float scale,
|
|
float *out) {
|
|
for (int i = 0; i < newHeight; ++i) {
|
|
for (int j = 0; j < newWidth; ++j) {
|
|
// position within smaller image
|
|
float x = ((float)j - 0.5f) * 0.5f;
|
|
float y = ((float)i - 0.5f) * 0.5f;
|
|
|
|
out[j + i * newStride] = Tex2D(src, width, height, stride, x, y) * scale;
|
|
}
|
|
}
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
/// \brief warp image with provided vector field
|
|
///
|
|
/// For each output pixel there is a vector which tells which pixel
|
|
/// from a source image should be mapped to this particular output
|
|
/// pixel.
|
|
/// It is assumed that images and the vector field have the same stride and
|
|
/// resolution.
|
|
/// \param[in] src source image
|
|
/// \param[in] w width
|
|
/// \param[in] h height
|
|
/// \param[in] s stride
|
|
/// \param[in] u horizontal displacement
|
|
/// \param[in] v vertical displacement
|
|
/// \param[out] out warped image
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
static void WarpImage(const float *src, int w, int h, int s, const float *u,
|
|
const float *v, float *out) {
|
|
for (int i = 0; i < h; ++i) {
|
|
for (int j = 0; j < w; ++j) {
|
|
const int pos = j + i * s;
|
|
// warped coords
|
|
float x = (float)j + u[pos];
|
|
float y = (float)i + v[pos];
|
|
|
|
out[pos] = Tex2D(src, w, h, s, x, y);
|
|
}
|
|
}
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
/// \brief computes image derivatives for a pair of images
|
|
/// \param[in] I0 source image
|
|
/// \param[in] I1 tracked image
|
|
/// \param[in] w images width
|
|
/// \param[in] h images height
|
|
/// \param[in] s images stride
|
|
/// \param[out] Ix x derivative
|
|
/// \param[out] Iy y derivative
|
|
/// \param[out] Iz temporal derivative
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
static void ComputeDerivatives(const float *I0, const float *I1, int w, int h,
|
|
int s, float *Ix, float *Iy, float *Iz) {
|
|
for (int i = 0; i < h; ++i) {
|
|
for (int j = 0; j < w; ++j) {
|
|
const int pos = j + i * s;
|
|
float t0, t1;
|
|
// derivative filter is (1, -8, 0, 8, -1)/12
|
|
// x derivative
|
|
t0 = Tex2Di(I0, w, h, s, j - 2, i);
|
|
t0 -= Tex2Di(I0, w, h, s, j - 1, i) * 8.0f;
|
|
t0 += Tex2Di(I0, w, h, s, j + 1, i) * 8.0f;
|
|
t0 -= Tex2Di(I0, w, h, s, j + 2, i);
|
|
t0 /= 12.0f;
|
|
|
|
t1 = Tex2Di(I1, w, h, s, j - 2, i);
|
|
t1 -= Tex2Di(I1, w, h, s, j - 1, i) * 8.0f;
|
|
t1 += Tex2Di(I1, w, h, s, j + 1, i) * 8.0f;
|
|
t1 -= Tex2Di(I1, w, h, s, j + 2, i);
|
|
t1 /= 12.0f;
|
|
|
|
// spatial derivatives are averaged
|
|
Ix[pos] = (t0 + t1) * 0.5f;
|
|
|
|
// t derivative
|
|
Iz[pos] = I1[pos] - I0[pos];
|
|
|
|
// y derivative
|
|
t0 = Tex2Di(I0, w, h, s, j, i - 2);
|
|
t0 -= Tex2Di(I0, w, h, s, j, i - 1) * 8.0f;
|
|
t0 += Tex2Di(I0, w, h, s, j, i + 1) * 8.0f;
|
|
t0 -= Tex2Di(I0, w, h, s, j, i + 2);
|
|
t0 /= 12.0f;
|
|
|
|
t1 = Tex2Di(I1, w, h, s, j, i - 2);
|
|
t1 -= Tex2Di(I1, w, h, s, j, i - 1) * 8.0f;
|
|
t1 += Tex2Di(I1, w, h, s, j, i + 1) * 8.0f;
|
|
t1 -= Tex2Di(I1, w, h, s, j, i + 2);
|
|
t1 /= 12.0f;
|
|
|
|
Iy[pos] = (t0 + t1) * 0.5f;
|
|
}
|
|
}
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
/// \brief one iteration of classical Horn-Schunck method
|
|
///
|
|
/// It is one iteration of Jacobi method for a corresponding linear system
|
|
/// \param[in] du0 current horizontal displacement approximation
|
|
/// \param[in] dv0 current vertical displacement approximation
|
|
/// \param[in] Ix image x derivative
|
|
/// \param[in] Iy image y derivative
|
|
/// \param[in] Iz temporal derivative
|
|
/// \param[in] w width
|
|
/// \param[in] h height
|
|
/// \param[in] s stride
|
|
/// \param[in] alpha degree of smoothness
|
|
/// \param[out] du1 new horizontal displacement approximation
|
|
/// \param[out] dv1 new vertical displacement approximation
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
static void SolveForUpdate(const float *du0, const float *dv0, const float *Ix,
|
|
const float *Iy, const float *Iz, int w, int h,
|
|
int s, float alpha, float *du1, float *dv1) {
|
|
for (int i = 0; i < h; ++i) {
|
|
for (int j = 0; j < w; ++j) {
|
|
const int pos = j + i * s;
|
|
int left, right, up, down;
|
|
|
|
// handle borders
|
|
if (j != 0)
|
|
left = pos - 1;
|
|
else
|
|
left = pos;
|
|
|
|
if (j != w - 1)
|
|
right = pos + 1;
|
|
else
|
|
right = pos;
|
|
|
|
if (i != 0)
|
|
down = pos - s;
|
|
else
|
|
down = pos;
|
|
|
|
if (i != h - 1)
|
|
up = pos + s;
|
|
else
|
|
up = pos;
|
|
|
|
float sumU = (du0[left] + du0[right] + du0[up] + du0[down]) * 0.25f;
|
|
float sumV = (dv0[left] + dv0[right] + dv0[up] + dv0[down]) * 0.25f;
|
|
|
|
float frac = (Ix[pos] * sumU + Iy[pos] * sumV + Iz[pos]) /
|
|
(Ix[pos] * Ix[pos] + Iy[pos] * Iy[pos] + alpha);
|
|
|
|
du1[pos] = sumU - Ix[pos] * frac;
|
|
dv1[pos] = sumV - Iy[pos] * frac;
|
|
}
|
|
}
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
/// \brief method logic
|
|
///
|
|
/// handles memory allocation and control flow
|
|
/// \param[in] I0 source image
|
|
/// \param[in] I1 tracked image
|
|
/// \param[in] width images width
|
|
/// \param[in] height images height
|
|
/// \param[in] stride images stride
|
|
/// \param[in] alpha degree of displacement field smoothness
|
|
/// \param[in] nLevels number of levels in a pyramid
|
|
/// \param[in] nWarpIters number of warping iterations per pyramid level
|
|
/// \param[in] nSolverIters number of solver iterations (Jacobi iterations)
|
|
/// \param[out] u horizontal displacement
|
|
/// \param[out] v vertical displacement
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
void ComputeFlowGold(const float *I0, const float *I1, int width, int height,
|
|
int stride, float alpha, int nLevels, int nWarpIters,
|
|
int nSolverIters, float *u, float *v) {
|
|
printf("Computing optical flow on CPU...\n");
|
|
|
|
float *u0 = u;
|
|
float *v0 = v;
|
|
|
|
const float **pI0 = new const float *[nLevels];
|
|
const float **pI1 = new const float *[nLevels];
|
|
|
|
int *pW = new int[nLevels];
|
|
int *pH = new int[nLevels];
|
|
int *pS = new int[nLevels];
|
|
|
|
const int pixelCountAligned = height * stride;
|
|
|
|
float *tmp = new float[pixelCountAligned];
|
|
float *du0 = new float[pixelCountAligned];
|
|
float *dv0 = new float[pixelCountAligned];
|
|
float *du1 = new float[pixelCountAligned];
|
|
float *dv1 = new float[pixelCountAligned];
|
|
float *Ix = new float[pixelCountAligned];
|
|
float *Iy = new float[pixelCountAligned];
|
|
float *Iz = new float[pixelCountAligned];
|
|
float *nu = new float[pixelCountAligned];
|
|
float *nv = new float[pixelCountAligned];
|
|
|
|
// prepare pyramid
|
|
int currentLevel = nLevels - 1;
|
|
pI0[currentLevel] = I0;
|
|
pI1[currentLevel] = I1;
|
|
|
|
pW[currentLevel] = width;
|
|
pH[currentLevel] = height;
|
|
pS[currentLevel] = stride;
|
|
|
|
for (; currentLevel > 0; --currentLevel) {
|
|
int nw = pW[currentLevel] / 2;
|
|
int nh = pH[currentLevel] / 2;
|
|
int ns = iAlignUp(nw);
|
|
pI0[currentLevel - 1] = new float[ns * nh];
|
|
pI1[currentLevel - 1] = new float[ns * nh];
|
|
|
|
Downscale(pI0[currentLevel], pW[currentLevel], pH[currentLevel],
|
|
pS[currentLevel], nw, nh, ns, (float *)pI0[currentLevel - 1]);
|
|
|
|
Downscale(pI1[currentLevel], pW[currentLevel], pH[currentLevel],
|
|
pS[currentLevel], nw, nh, ns, (float *)pI1[currentLevel - 1]);
|
|
|
|
pW[currentLevel - 1] = nw;
|
|
pH[currentLevel - 1] = nh;
|
|
pS[currentLevel - 1] = ns;
|
|
}
|
|
|
|
// initial approximation
|
|
memset(u, 0, stride * height * sizeof(float));
|
|
memset(v, 0, stride * height * sizeof(float));
|
|
|
|
// compute flow
|
|
for (; currentLevel < nLevels; ++currentLevel) {
|
|
for (int warpIter = 0; warpIter < nWarpIters; ++warpIter) {
|
|
memset(du0, 0, pixelCountAligned * sizeof(float));
|
|
memset(dv0, 0, pixelCountAligned * sizeof(float));
|
|
|
|
memset(du1, 0, pixelCountAligned * sizeof(float));
|
|
memset(dv1, 0, pixelCountAligned * sizeof(float));
|
|
|
|
WarpImage(pI1[currentLevel], pW[currentLevel], pH[currentLevel],
|
|
pS[currentLevel], u, v, tmp);
|
|
|
|
// on current level we compute optical flow
|
|
// between frame 0 and warped frame 1
|
|
ComputeDerivatives(pI0[currentLevel], tmp, pW[currentLevel],
|
|
pH[currentLevel], pS[currentLevel], Ix, Iy, Iz);
|
|
|
|
for (int iter = 0; iter < nSolverIters; ++iter) {
|
|
SolveForUpdate(du0, dv0, Ix, Iy, Iz, pW[currentLevel], pH[currentLevel],
|
|
pS[currentLevel], alpha, du1, dv1);
|
|
Swap(du0, du1);
|
|
Swap(dv0, dv1);
|
|
}
|
|
|
|
// update u, v
|
|
for (int i = 0; i < pH[currentLevel] * pS[currentLevel]; ++i) {
|
|
u[i] += du0[i];
|
|
v[i] += dv0[i];
|
|
}
|
|
} // end for (int warpIter = 0; warpIter < nWarpIters; ++warpIter)
|
|
|
|
if (currentLevel != nLevels - 1) {
|
|
// prolongate solution
|
|
float scaleX = (float)pW[currentLevel + 1] / (float)pW[currentLevel];
|
|
|
|
Upscale(u, pW[currentLevel], pH[currentLevel], pS[currentLevel],
|
|
pW[currentLevel + 1], pH[currentLevel + 1], pS[currentLevel + 1],
|
|
scaleX, nu);
|
|
|
|
float scaleY = (float)pH[currentLevel + 1] / (float)pH[currentLevel];
|
|
|
|
Upscale(v, pW[currentLevel], pH[currentLevel], pS[currentLevel],
|
|
pW[currentLevel + 1], pH[currentLevel + 1], pS[currentLevel + 1],
|
|
scaleY, nv);
|
|
|
|
Swap(u, nu);
|
|
Swap(v, nv);
|
|
}
|
|
} // end for (; currentLevel < nLevels; ++currentLevel)
|
|
|
|
if (u != u0) {
|
|
// solution is not in the specified array
|
|
// copy
|
|
memcpy(u0, u, pixelCountAligned * sizeof(float));
|
|
memcpy(v0, v, pixelCountAligned * sizeof(float));
|
|
Swap(u, nu);
|
|
Swap(v, nv);
|
|
}
|
|
|
|
// cleanup
|
|
// last level is not being freed here
|
|
// because it refers to input images
|
|
for (int i = 0; i < nLevels - 1; ++i) {
|
|
delete[] pI0[i];
|
|
delete[] pI1[i];
|
|
}
|
|
|
|
delete[] pI0;
|
|
delete[] pI1;
|
|
delete[] pW;
|
|
delete[] pH;
|
|
delete[] pS;
|
|
delete[] tmp;
|
|
delete[] du0;
|
|
delete[] dv0;
|
|
delete[] du1;
|
|
delete[] dv1;
|
|
delete[] Ix;
|
|
delete[] Iy;
|
|
delete[] Iz;
|
|
delete[] nu;
|
|
delete[] nv;
|
|
}
|