mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2024-11-28 16:39:15 +08:00
782 lines
21 KiB
C++
782 lines
21 KiB
C++
|
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||
|
*
|
||
|
* Redistribution and use in source and binary forms, with or without
|
||
|
* modification, are permitted provided that the following conditions
|
||
|
* are met:
|
||
|
* * Redistributions of source code must retain the above copyright
|
||
|
* notice, this list of conditions and the following disclaimer.
|
||
|
* * Redistributions in binary form must reproduce the above copyright
|
||
|
* notice, this list of conditions and the following disclaimer in the
|
||
|
* documentation and/or other materials provided with the distribution.
|
||
|
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||
|
* contributors may be used to endorse or promote products derived
|
||
|
* from this software without specific prior written permission.
|
||
|
*
|
||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
Bicubic texture filtering sample
|
||
|
sgreen 6/2008
|
||
|
|
||
|
This sample demonstrates how to efficiently implement bicubic texture
|
||
|
filtering in CUDA.
|
||
|
|
||
|
Bicubic filtering is a higher order interpolation method that produces
|
||
|
smoother results than bilinear interpolation:
|
||
|
http://en.wikipedia.org/wiki/Bicubic
|
||
|
|
||
|
It requires reading a 4 x 4 pixel neighbourhood rather than the
|
||
|
2 x 2 area required by bilinear filtering.
|
||
|
|
||
|
Current graphics hardware doesn't support bicubic filtering natively,
|
||
|
but it is possible to compose a bicubic filter using just 4 bilinear
|
||
|
lookups by offsetting the sample position within each texel and weighting
|
||
|
the samples correctly. The only disadvantage to this method is that the
|
||
|
hardware only maintains 9-bits of filtering precision within each texel.
|
||
|
|
||
|
See "Fast Third-Order Texture Filtering", Sigg & Hadwiger, GPU Gems 2:
|
||
|
https://developer.nvidia.com/gpugems/gpugems2/part-iii-high-quality-rendering/chapter-20-fast-third-order-texture-filtering
|
||
|
|
||
|
v1.1 - updated to include the brute force method using 16 texture lookups.
|
||
|
v1.2 - added Catmull-Rom interpolation
|
||
|
|
||
|
Example performance results from GeForce 8800 GTS:
|
||
|
Bilinear - 5500 MPixels/sec
|
||
|
Bicubic - 1400 MPixels/sec
|
||
|
Fast Bicubic - 2100 MPixels/sec
|
||
|
*/
|
||
|
|
||
|
// OpenGL Graphics includes
|
||
|
#include <helper_gl.h>
|
||
|
#if defined(__APPLE__) || defined(MACOSX)
|
||
|
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
||
|
#include <GLUT/glut.h>
|
||
|
#else
|
||
|
#include <GL/freeglut.h>
|
||
|
#endif
|
||
|
|
||
|
// Includes
|
||
|
#include <stdlib.h>
|
||
|
#include <stdio.h>
|
||
|
#include <string.h>
|
||
|
#include <math.h>
|
||
|
|
||
|
// CUDA system and GL includes
|
||
|
#include <cuda_runtime.h>
|
||
|
#include <cuda_gl_interop.h>
|
||
|
|
||
|
// Helper functions
|
||
|
#include <helper_functions.h> // CUDA SDK Helper functions
|
||
|
#include <helper_cuda.h> // CUDA device initialization helper functions
|
||
|
|
||
|
typedef unsigned int uint;
|
||
|
typedef unsigned char uchar;
|
||
|
|
||
|
#define USE_BUFFER_TEX 0
|
||
|
#ifndef MAX
|
||
|
#define MAX(a, b) ((a < b) ? b : a)
|
||
|
#endif
|
||
|
|
||
|
// Auto-Verification Code
|
||
|
const int frameCheckNumber = 4;
|
||
|
int fpsCount = 0; // FPS count for averaging
|
||
|
int fpsLimit = 4; // FPS limit for sampling
|
||
|
int g_Index = 0;
|
||
|
unsigned int frameCount = 0;
|
||
|
unsigned int g_TotalErrors = 0;
|
||
|
StopWatchInterface *timer = 0;
|
||
|
bool g_Verify = false;
|
||
|
|
||
|
int *pArgc = NULL;
|
||
|
char **pArgv = NULL;
|
||
|
|
||
|
#define MAX_EPSILON_ERROR 5.0f
|
||
|
#define REFRESH_DELAY 10 // ms
|
||
|
|
||
|
static const char *sSDKsample = "CUDA BicubicTexture";
|
||
|
|
||
|
// Define the files that are to be save and the reference images for validation
|
||
|
const char *sFilterMode[] = {"Nearest", "Bilinear", "Bicubic",
|
||
|
"Fast Bicubic", "Catmull-Rom", NULL};
|
||
|
|
||
|
const char *sOriginal[] = {"0_nearest.ppm", "1_bilinear.ppm",
|
||
|
"2_bicubic.ppm", "3_fastbicubic.ppm",
|
||
|
"4_catmull-rom.ppm", NULL};
|
||
|
|
||
|
const char *sReference[] = {"0_nearest.ppm", "1_bilinear.ppm",
|
||
|
"2_bicubic.ppm", "3_fastbicubic.ppm",
|
||
|
"4_catmull-rom.ppm", NULL};
|
||
|
|
||
|
const char *srcImageFilename = "lena_bw.pgm";
|
||
|
char *dumpFilename = NULL;
|
||
|
|
||
|
uint width = 512, height = 512;
|
||
|
uint imageWidth, imageHeight;
|
||
|
dim3 blockSize(16, 16);
|
||
|
dim3 gridSize(width / blockSize.x, height / blockSize.y);
|
||
|
|
||
|
enum eFilterMode {
|
||
|
MODE_NEAREST,
|
||
|
MODE_BILINEAR,
|
||
|
MODE_BICUBIC,
|
||
|
MODE_FAST_BICUBIC,
|
||
|
MODE_CATMULL_ROM,
|
||
|
NUM_MODES
|
||
|
};
|
||
|
|
||
|
eFilterMode g_FilterMode = MODE_FAST_BICUBIC;
|
||
|
|
||
|
bool drawCurves = false;
|
||
|
|
||
|
GLuint pbo = 0; // OpenGL pixel buffer object
|
||
|
struct cudaGraphicsResource *cuda_pbo_resource; // handles OpenGL-CUDA exchange
|
||
|
GLuint displayTex = 0;
|
||
|
GLuint bufferTex = 0;
|
||
|
GLuint fprog; // fragment program (shader)
|
||
|
|
||
|
float tx = 9.0f, ty = 10.0f; // image translation
|
||
|
float scale = 1.0f / 16.0f; // image scale
|
||
|
float cx, cy; // image centre
|
||
|
|
||
|
void display();
|
||
|
void initGLBuffers();
|
||
|
void runBenchmark(int iterations);
|
||
|
void cleanup();
|
||
|
|
||
|
#define GL_TEXTURE_TYPE GL_TEXTURE_RECTANGLE_ARB
|
||
|
//#define GL_TEXTURE_TYPE GL_TEXTURE_2D
|
||
|
|
||
|
extern "C" void initGL(int *argc, char **argv);
|
||
|
extern "C" void loadImageData(int argc, char **argv);
|
||
|
|
||
|
extern "C" void initTexture(int imageWidth, int imageHeight, uchar *h_data);
|
||
|
extern "C" void freeTexture();
|
||
|
extern "C" void render(int width, int height, float tx, float ty, float scale,
|
||
|
float cx, float cy, dim3 blockSize, dim3 gridSize,
|
||
|
eFilterMode filter_mode, uchar4 *output);
|
||
|
|
||
|
// w0, w1, w2, and w3 are the four cubic B-spline basis functions
|
||
|
float bspline_w0(float a) {
|
||
|
return (1.0f / 6.0f) * (-a * a * a + 3.0f * a * a - 3.0f * a + 1.0f);
|
||
|
}
|
||
|
|
||
|
float bspline_w1(float a) {
|
||
|
return (1.0f / 6.0f) * (3.0f * a * a * a - 6.0f * a * a + 4.0f);
|
||
|
}
|
||
|
|
||
|
float bspline_w2(float a) {
|
||
|
return (1.0f / 6.0f) * (-3.0f * a * a * a + 3.0f * a * a + 3.0f * a + 1.0f);
|
||
|
}
|
||
|
|
||
|
__host__ __device__ float bspline_w3(float a) {
|
||
|
return (1.0f / 6.0f) * (a * a * a);
|
||
|
}
|
||
|
|
||
|
void computeFPS() {
|
||
|
frameCount++;
|
||
|
fpsCount++;
|
||
|
|
||
|
if (fpsCount == fpsLimit - 1) {
|
||
|
g_Verify = true;
|
||
|
}
|
||
|
|
||
|
if (fpsCount == fpsLimit) {
|
||
|
char fps[256];
|
||
|
float ifps = 1.f / (sdkGetAverageTimerValue(&timer) / 1000.f);
|
||
|
sprintf(fps, "%s %s <%s>: %3.1f fps", "", sSDKsample,
|
||
|
sFilterMode[g_FilterMode], ifps);
|
||
|
|
||
|
glutSetWindowTitle(fps);
|
||
|
fpsCount = 0;
|
||
|
|
||
|
sdkResetTimer(&timer);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void plotCurve(float (*func)(float)) {
|
||
|
const int steps = 100;
|
||
|
glBegin(GL_LINE_STRIP);
|
||
|
|
||
|
for (int i = 0; i < steps; i++) {
|
||
|
float x = i / (float)(steps - 1);
|
||
|
glVertex2f(x, func(x));
|
||
|
}
|
||
|
|
||
|
glEnd();
|
||
|
}
|
||
|
|
||
|
// display results using OpenGL (called by GLUT)
|
||
|
void display() {
|
||
|
sdkStartTimer(&timer);
|
||
|
|
||
|
// map PBO to get CUDA device pointer
|
||
|
uchar4 *d_output;
|
||
|
checkCudaErrors(cudaGraphicsMapResources(1, &cuda_pbo_resource, 0));
|
||
|
size_t num_bytes;
|
||
|
checkCudaErrors(cudaGraphicsResourceGetMappedPointer(
|
||
|
(void **)&d_output, &num_bytes, cuda_pbo_resource));
|
||
|
render(imageWidth, imageHeight, tx, ty, scale, cx, cy, blockSize, gridSize,
|
||
|
g_FilterMode, d_output);
|
||
|
|
||
|
checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0));
|
||
|
|
||
|
// Common display path
|
||
|
{
|
||
|
// display results
|
||
|
glClear(GL_COLOR_BUFFER_BIT);
|
||
|
|
||
|
#if USE_BUFFER_TEX
|
||
|
// display using buffer texture
|
||
|
glBindTexture(GL_TEXTURE_BUFFER_EXT, bufferTex);
|
||
|
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, fprog);
|
||
|
glEnable(GL_FRAGMENT_PROGRAM_ARB);
|
||
|
glProgramLocalParameterI4iNV(GL_FRAGMENT_PROGRAM_ARB, 0, width, 0, 0, 0);
|
||
|
#else
|
||
|
// download image from PBO to OpenGL texture
|
||
|
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
|
||
|
glBindTexture(GL_TEXTURE_TYPE, displayTex);
|
||
|
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
|
||
|
glTexSubImage2D(GL_TEXTURE_TYPE, 0, 0, 0, width, height, GL_BGRA,
|
||
|
GL_UNSIGNED_BYTE, 0);
|
||
|
glEnable(GL_TEXTURE_TYPE);
|
||
|
#endif
|
||
|
|
||
|
// draw textured quad
|
||
|
glDisable(GL_DEPTH_TEST);
|
||
|
glBegin(GL_QUADS);
|
||
|
glTexCoord2f(0.0f, (GLfloat)height);
|
||
|
glVertex2f(0.0f, 0.0f);
|
||
|
glTexCoord2f((GLfloat)width, (GLfloat)height);
|
||
|
glVertex2f(1.0f, 0.0f);
|
||
|
glTexCoord2f((GLfloat)width, 0.0f);
|
||
|
glVertex2f(1.0f, 1.0f);
|
||
|
glTexCoord2f(0.0f, 0.0f);
|
||
|
glVertex2f(0.0f, 1.0f);
|
||
|
glEnd();
|
||
|
glDisable(GL_TEXTURE_TYPE);
|
||
|
glDisable(GL_FRAGMENT_PROGRAM_ARB);
|
||
|
|
||
|
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
|
||
|
|
||
|
if (drawCurves) {
|
||
|
// draw spline curves
|
||
|
glPushMatrix();
|
||
|
glScalef(0.25, 0.25, 1.0);
|
||
|
|
||
|
glTranslatef(0.0, 2.0, 0.0);
|
||
|
glColor3f(1.0, 0.0, 0.0);
|
||
|
plotCurve(bspline_w3);
|
||
|
|
||
|
glTranslatef(1.0, 0.0, 0.0);
|
||
|
glColor3f(0.0, 1.0, 0.0);
|
||
|
plotCurve(bspline_w2);
|
||
|
|
||
|
glTranslatef(1.0, 0.0, 0.0);
|
||
|
glColor3f(0.0, 0.0, 1.0);
|
||
|
plotCurve(bspline_w1);
|
||
|
|
||
|
glTranslatef(1.0, 0.0, 0.0);
|
||
|
glColor3f(1.0, 0.0, 1.0);
|
||
|
plotCurve(bspline_w0);
|
||
|
|
||
|
glPopMatrix();
|
||
|
glColor3f(1.0, 1.0, 1.0);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
glutSwapBuffers();
|
||
|
glutReportErrors();
|
||
|
|
||
|
sdkStopTimer(&timer);
|
||
|
|
||
|
computeFPS();
|
||
|
}
|
||
|
|
||
|
// GLUT callback functions
|
||
|
void timerEvent(int value) {
|
||
|
if (glutGetWindow()) {
|
||
|
glutPostRedisplay();
|
||
|
glutTimerFunc(REFRESH_DELAY, timerEvent, 0);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void keyboard(unsigned char key, int /*x*/, int /*y*/) {
|
||
|
switch (key) {
|
||
|
case 27:
|
||
|
#if defined(__APPLE__) || defined(MACOSX)
|
||
|
exit(EXIT_SUCCESS);
|
||
|
#else
|
||
|
glutDestroyWindow(glutGetWindow());
|
||
|
return;
|
||
|
#endif
|
||
|
|
||
|
case '1':
|
||
|
g_FilterMode = MODE_NEAREST;
|
||
|
break;
|
||
|
|
||
|
case '2':
|
||
|
g_FilterMode = MODE_BILINEAR;
|
||
|
break;
|
||
|
|
||
|
case '3':
|
||
|
g_FilterMode = MODE_BICUBIC;
|
||
|
break;
|
||
|
|
||
|
case '4':
|
||
|
g_FilterMode = MODE_FAST_BICUBIC;
|
||
|
break;
|
||
|
|
||
|
case '5':
|
||
|
g_FilterMode = MODE_CATMULL_ROM;
|
||
|
break;
|
||
|
|
||
|
case '=':
|
||
|
case '+':
|
||
|
scale *= 0.5f;
|
||
|
break;
|
||
|
|
||
|
case '-':
|
||
|
scale *= 2.0f;
|
||
|
break;
|
||
|
|
||
|
case 'r':
|
||
|
scale = 1.0f;
|
||
|
tx = ty = 0.0f;
|
||
|
break;
|
||
|
|
||
|
case 'd':
|
||
|
printf("%f, %f, %f\n", tx, ty, scale);
|
||
|
break;
|
||
|
|
||
|
case 'b':
|
||
|
runBenchmark(500);
|
||
|
break;
|
||
|
|
||
|
case 'c':
|
||
|
drawCurves ^= 1;
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (key >= '1' && key <= '5') {
|
||
|
printf("> FilterMode[%d] = %s\n", g_FilterMode + 1,
|
||
|
sFilterMode[g_FilterMode]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
int ox, oy;
|
||
|
int buttonState = 0;
|
||
|
|
||
|
void mouse(int button, int state, int x, int y) {
|
||
|
if (state == GLUT_DOWN) {
|
||
|
buttonState |= 1 << button;
|
||
|
} else if (state == GLUT_UP) {
|
||
|
buttonState = 0;
|
||
|
}
|
||
|
|
||
|
ox = x;
|
||
|
oy = y;
|
||
|
}
|
||
|
|
||
|
void motion(int x, int y) {
|
||
|
float dx, dy;
|
||
|
dx = (float)(x - ox);
|
||
|
dy = (float)(y - oy);
|
||
|
|
||
|
if (buttonState & 1) {
|
||
|
// left = translate
|
||
|
tx -= dx * scale;
|
||
|
ty -= dy * scale;
|
||
|
} else if (buttonState & 2) {
|
||
|
// middle = zoom
|
||
|
scale -= dy / 1000.0f;
|
||
|
}
|
||
|
|
||
|
ox = x;
|
||
|
oy = y;
|
||
|
}
|
||
|
|
||
|
void reshape(int x, int y) {
|
||
|
width = x;
|
||
|
height = y;
|
||
|
imageWidth = width;
|
||
|
imageHeight = height;
|
||
|
|
||
|
initGLBuffers();
|
||
|
|
||
|
glViewport(0, 0, x, y);
|
||
|
|
||
|
glMatrixMode(GL_MODELVIEW);
|
||
|
glLoadIdentity();
|
||
|
|
||
|
glMatrixMode(GL_PROJECTION);
|
||
|
glLoadIdentity();
|
||
|
glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
|
||
|
}
|
||
|
|
||
|
void cleanup() {
|
||
|
freeTexture();
|
||
|
checkCudaErrors(cudaGraphicsUnregisterResource(cuda_pbo_resource));
|
||
|
|
||
|
glDeleteBuffers(1, &pbo);
|
||
|
|
||
|
#if USE_BUFFER_TEX
|
||
|
glDeleteTextures(1, &bufferTex);
|
||
|
glDeleteProgramsARB(1, &fprog);
|
||
|
#else
|
||
|
glDeleteTextures(1, &displayTex);
|
||
|
#endif
|
||
|
|
||
|
sdkDeleteTimer(&timer);
|
||
|
}
|
||
|
|
||
|
int iDivUp(int a, int b) { return (a % b != 0) ? (a / b + 1) : (a / b); }
|
||
|
|
||
|
void initGLBuffers() {
|
||
|
if (pbo) {
|
||
|
// delete old buffer
|
||
|
checkCudaErrors(cudaGraphicsUnregisterResource(cuda_pbo_resource));
|
||
|
glDeleteBuffers(1, &pbo);
|
||
|
}
|
||
|
|
||
|
// create pixel buffer object for display
|
||
|
glGenBuffers(1, &pbo);
|
||
|
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
|
||
|
glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, width * height * sizeof(uchar4), 0,
|
||
|
GL_STREAM_DRAW_ARB);
|
||
|
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
|
||
|
|
||
|
checkCudaErrors(cudaGraphicsGLRegisterBuffer(
|
||
|
&cuda_pbo_resource, pbo, cudaGraphicsMapFlagsWriteDiscard));
|
||
|
|
||
|
#if USE_BUFFER_TEX
|
||
|
|
||
|
// create buffer texture, attach to pbo
|
||
|
if (bufferTex) {
|
||
|
glDeleteTextures(1, &bufferTex);
|
||
|
}
|
||
|
|
||
|
glGenTextures(1, &bufferTex);
|
||
|
glBindTexture(GL_TEXTURE_BUFFER_EXT, bufferTex);
|
||
|
glTexBufferEXT(GL_TEXTURE_BUFFER_EXT, GL_RGBA8, pbo);
|
||
|
glBindTexture(GL_TEXTURE_BUFFER_EXT, 0);
|
||
|
#else
|
||
|
|
||
|
// create texture for display
|
||
|
if (displayTex) {
|
||
|
glDeleteTextures(1, &displayTex);
|
||
|
}
|
||
|
|
||
|
glGenTextures(1, &displayTex);
|
||
|
glBindTexture(GL_TEXTURE_TYPE, displayTex);
|
||
|
glTexImage2D(GL_TEXTURE_TYPE, 0, GL_RGBA8, width, height, 0, GL_RGBA,
|
||
|
GL_UNSIGNED_BYTE, NULL);
|
||
|
glTexParameteri(GL_TEXTURE_TYPE, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||
|
glTexParameteri(GL_TEXTURE_TYPE, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||
|
glBindTexture(GL_TEXTURE_TYPE, 0);
|
||
|
#endif
|
||
|
|
||
|
// calculate new grid size
|
||
|
gridSize = dim3(iDivUp(width, blockSize.x), iDivUp(height, blockSize.y));
|
||
|
}
|
||
|
|
||
|
void mainMenu(int i) { keyboard(i, 0, 0); }
|
||
|
|
||
|
void initMenus() {
|
||
|
glutCreateMenu(mainMenu);
|
||
|
glutAddMenuEntry("Nearest [1]", '1');
|
||
|
glutAddMenuEntry("Bilinear [2]", '2');
|
||
|
glutAddMenuEntry("Bicubic [3]", '3');
|
||
|
glutAddMenuEntry("Fast Bicubic [4]", '4');
|
||
|
glutAddMenuEntry("Catmull-Rom [5]", '5');
|
||
|
glutAddMenuEntry("Zoom in [=]", '=');
|
||
|
glutAddMenuEntry("Zoom out [-]", '-');
|
||
|
glutAddMenuEntry("Benchmark [b]", 'b');
|
||
|
glutAddMenuEntry("DrawCurves [c]", 'c');
|
||
|
glutAddMenuEntry("Quit [esc]", 27);
|
||
|
glutAttachMenu(GLUT_RIGHT_BUTTON);
|
||
|
}
|
||
|
|
||
|
void runBenchmark(int iterations) {
|
||
|
printf("[%s] (Benchmark Mode)\n", sSDKsample);
|
||
|
|
||
|
sdkCreateTimer(&timer);
|
||
|
|
||
|
uchar4 *d_output;
|
||
|
checkCudaErrors(cudaGraphicsMapResources(1, &cuda_pbo_resource, 0));
|
||
|
size_t num_bytes;
|
||
|
checkCudaErrors(cudaGraphicsResourceGetMappedPointer(
|
||
|
(void **)&d_output, &num_bytes, cuda_pbo_resource));
|
||
|
|
||
|
sdkStartTimer(&timer);
|
||
|
|
||
|
for (int i = 0; i < iterations; ++i) {
|
||
|
render(imageWidth, imageHeight, tx, ty, scale, cx, cy, blockSize, gridSize,
|
||
|
g_FilterMode, d_output);
|
||
|
}
|
||
|
|
||
|
cudaDeviceSynchronize();
|
||
|
sdkStopTimer(&timer);
|
||
|
float time = sdkGetTimerValue(&timer) / (float)iterations;
|
||
|
|
||
|
checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0));
|
||
|
|
||
|
printf("time: %0.3f ms, %f Mpixels/sec\n", time,
|
||
|
(width * height / (time * 0.001f)) / 1e6);
|
||
|
}
|
||
|
|
||
|
void runAutoTest(int argc, char **argv, const char *dump_filename,
|
||
|
eFilterMode filter_mode) {
|
||
|
cudaDeviceProp deviceProps;
|
||
|
|
||
|
int devID = findCudaDevice(argc, (const char **)argv);
|
||
|
|
||
|
checkCudaErrors(cudaGetDeviceProperties(&deviceProps, devID));
|
||
|
|
||
|
printf("[%s] (automated testing w/ readback)\n", sSDKsample);
|
||
|
printf("CUDA device [%s] has %d Multi-Processors\n", deviceProps.name,
|
||
|
deviceProps.multiProcessorCount);
|
||
|
|
||
|
loadImageData(argc, argv);
|
||
|
|
||
|
uchar4 *d_output;
|
||
|
checkCudaErrors(cudaMalloc((void **)&d_output, imageWidth * imageHeight * 4));
|
||
|
unsigned int *h_result =
|
||
|
(unsigned int *)malloc(width * height * sizeof(unsigned int));
|
||
|
|
||
|
printf("AutoTest: %s Filter Mode: <%s>\n", sSDKsample,
|
||
|
sFilterMode[g_FilterMode]);
|
||
|
|
||
|
render(imageWidth, imageHeight, tx, ty, scale, cx, cy, blockSize, gridSize,
|
||
|
filter_mode, d_output);
|
||
|
|
||
|
// check if kernel execution generated an error
|
||
|
getLastCudaError("Error: render (bicubicTexture) Kernel execution FAILED");
|
||
|
checkCudaErrors(cudaDeviceSynchronize());
|
||
|
|
||
|
cudaMemcpy(h_result, d_output, imageWidth * imageHeight * 4,
|
||
|
cudaMemcpyDeviceToHost);
|
||
|
|
||
|
sdkSavePPM4ub(dump_filename, (unsigned char *)h_result, imageWidth,
|
||
|
imageHeight);
|
||
|
|
||
|
checkCudaErrors(cudaFree(d_output));
|
||
|
free(h_result);
|
||
|
}
|
||
|
|
||
|
#if USE_BUFFER_TEX
|
||
|
// fragment program for reading from buffer texture
|
||
|
static const char *shaderCode =
|
||
|
"!!NVfp4.0\n"
|
||
|
"INT PARAM width = program.local[0];\n"
|
||
|
"INT TEMP index;\n"
|
||
|
"FLR.S index, fragment.texcoord;\n"
|
||
|
"MAD.S index.x, index.y, width, index.x;\n" // compute 1D index from 2D
|
||
|
// coords
|
||
|
"TXF result.color, index.x, texture[0], BUFFER;\n"
|
||
|
"END";
|
||
|
#endif
|
||
|
|
||
|
GLuint compileASMShader(GLenum program_type, const char *code) {
|
||
|
GLuint program_id;
|
||
|
glGenProgramsARB(1, &program_id);
|
||
|
glBindProgramARB(program_type, program_id);
|
||
|
glProgramStringARB(program_type, GL_PROGRAM_FORMAT_ASCII_ARB,
|
||
|
(GLsizei)strlen(code), (GLubyte *)code);
|
||
|
|
||
|
GLint error_pos;
|
||
|
glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &error_pos);
|
||
|
|
||
|
if (error_pos != -1) {
|
||
|
const GLubyte *error_string;
|
||
|
error_string = glGetString(GL_PROGRAM_ERROR_STRING_ARB);
|
||
|
fprintf(stderr, "Program error at position: %d\n%s\n", (int)error_pos,
|
||
|
error_string);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
return program_id;
|
||
|
}
|
||
|
|
||
|
void initialize(int argc, char **argv) {
|
||
|
printf("[%s] (OpenGL Mode)\n", sSDKsample);
|
||
|
|
||
|
initGL(&argc, argv);
|
||
|
|
||
|
// use command-line specified CUDA device, otherwise use device with highest
|
||
|
// Gflops/s
|
||
|
int devID = findCudaDevice(argc, (const char **)argv);
|
||
|
|
||
|
// get number of SMs on this GPU
|
||
|
cudaDeviceProp deviceProps;
|
||
|
checkCudaErrors(cudaGetDeviceProperties(&deviceProps, devID));
|
||
|
printf("CUDA device [%s] has %d Multi-Processors\n", deviceProps.name,
|
||
|
deviceProps.multiProcessorCount);
|
||
|
|
||
|
// Create the timer (for fps measurement)
|
||
|
sdkCreateTimer(&timer);
|
||
|
|
||
|
// load image from disk
|
||
|
loadImageData(argc, argv);
|
||
|
|
||
|
printf(
|
||
|
"\n"
|
||
|
"\tControls\n"
|
||
|
"\t=/- : Zoom in/out\n"
|
||
|
"\tb : Run Benchmark g_FilterMode\n"
|
||
|
"\tc : Draw Bicubic Spline Curve\n"
|
||
|
"\t[esc] - Quit\n\n"
|
||
|
|
||
|
"\tPress number keys to change filtering g_FilterMode:\n\n"
|
||
|
"\t1 : nearest filtering\n"
|
||
|
"\t2 : bilinear filtering\n"
|
||
|
"\t3 : bicubic filtering\n"
|
||
|
"\t4 : fast bicubic filtering\n"
|
||
|
"\t5 : Catmull-Rom filtering\n\n");
|
||
|
|
||
|
initGLBuffers();
|
||
|
|
||
|
#if USE_BUFFER_TEX
|
||
|
fprog = compileASMShader(GL_FRAGMENT_PROGRAM_ARB, shaderCode);
|
||
|
|
||
|
if (!fprog) {
|
||
|
exit(EXIT_SUCCESS);
|
||
|
}
|
||
|
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
void initGL(int *argc, char **argv) {
|
||
|
// initialize GLUT callback functions
|
||
|
glutInit(argc, argv);
|
||
|
glutInitDisplayMode(GLUT_RGBA | GLUT_ALPHA | GLUT_DOUBLE | GLUT_DEPTH);
|
||
|
glutInitWindowSize(width, height);
|
||
|
glutCreateWindow("CUDA bicubic texture filtering");
|
||
|
glutDisplayFunc(display);
|
||
|
glutKeyboardFunc(keyboard);
|
||
|
glutMouseFunc(mouse);
|
||
|
glutMotionFunc(motion);
|
||
|
glutReshapeFunc(reshape);
|
||
|
glutTimerFunc(REFRESH_DELAY, timerEvent, 0);
|
||
|
|
||
|
#if defined(__APPLE__) || defined(MACOSX)
|
||
|
atexit(cleanup);
|
||
|
#else
|
||
|
glutCloseFunc(cleanup);
|
||
|
#endif
|
||
|
|
||
|
initMenus();
|
||
|
|
||
|
if (!isGLVersionSupported(2, 0) ||
|
||
|
!areGLExtensionsSupported("GL_ARB_pixel_buffer_object")) {
|
||
|
fprintf(stderr, "Required OpenGL extensions are missing.");
|
||
|
exit(EXIT_FAILURE);
|
||
|
}
|
||
|
|
||
|
#if USE_BUFFER_TEX
|
||
|
|
||
|
if (!areGLExtensionsSupported("GL_EXT_texture_buffer_object")) {
|
||
|
fprintf(stderr,
|
||
|
"OpenGL extension: GL_EXT_texture_buffer_object missing.\n");
|
||
|
exit(EXIT_FAILURE);
|
||
|
}
|
||
|
|
||
|
if (!areGLExtensionsSupported("GL_NV_gpu_program4")) {
|
||
|
fprintf(stderr, "OpenGL extension: GL_NV_gpu_program4 missing.\n");
|
||
|
exit(EXIT_FAILURE);
|
||
|
}
|
||
|
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
void loadImageData(int argc, char **argv) {
|
||
|
// load image from disk
|
||
|
uchar *h_data = NULL;
|
||
|
char *srcImagePath = NULL;
|
||
|
|
||
|
if ((srcImagePath = sdkFindFilePath(srcImageFilename, argv[0])) == NULL) {
|
||
|
printf("bicubicTexture loadImageData() could not find <%s>\nExiting...\n",
|
||
|
srcImageFilename);
|
||
|
exit(EXIT_FAILURE);
|
||
|
}
|
||
|
|
||
|
sdkLoadPGM<unsigned char>(srcImagePath, &h_data, &imageWidth, &imageHeight);
|
||
|
|
||
|
printf("Loaded '%s', %d x %d pixels\n", srcImageFilename, imageWidth,
|
||
|
imageHeight);
|
||
|
|
||
|
cx = imageWidth * 0.5f;
|
||
|
cy = imageHeight * 0.5f;
|
||
|
|
||
|
// initialize texture
|
||
|
initTexture(imageWidth, imageHeight, h_data);
|
||
|
}
|
||
|
|
||
|
void printHelp() {
|
||
|
printf("bicubicTexture Usage:\n");
|
||
|
printf("\t-file=output.ppm (output file to save to disk)\n");
|
||
|
printf(
|
||
|
"\t-mode=n (0=Nearest, 1=Bilinear, 2=Bicubic, 3=Fast-Bicubic, "
|
||
|
"4=Catmull-Rom\n");
|
||
|
}
|
||
|
|
||
|
////////////////////////////////////////////////////////////////////////////////
|
||
|
// Program main
|
||
|
////////////////////////////////////////////////////////////////////////////////
|
||
|
int main(int argc, char **argv) {
|
||
|
pArgc = &argc;
|
||
|
pArgv = argv;
|
||
|
|
||
|
// parse arguments
|
||
|
char *filename;
|
||
|
|
||
|
#if defined(__linux__)
|
||
|
setenv("DISPLAY", ":0", 0);
|
||
|
#endif
|
||
|
|
||
|
printf("Starting bicubicTexture\n");
|
||
|
|
||
|
if (checkCmdLineFlag(argc, (const char **)argv, "help")) {
|
||
|
printHelp();
|
||
|
exit(EXIT_SUCCESS);
|
||
|
}
|
||
|
|
||
|
if (checkCmdLineFlag(argc, (const char **)argv, "mode")) {
|
||
|
g_FilterMode =
|
||
|
(eFilterMode)getCmdLineArgumentInt(argc, (const char **)argv, "mode");
|
||
|
|
||
|
if (g_FilterMode < 0 || g_FilterMode >= NUM_MODES) {
|
||
|
printf("Invalid Mode setting %d\n", g_FilterMode);
|
||
|
exit(EXIT_FAILURE);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (getCmdLineArgumentString(argc, (const char **)argv, "file", &filename)) {
|
||
|
dumpFilename = filename;
|
||
|
fpsLimit = frameCheckNumber;
|
||
|
|
||
|
// Running CUDA kernel (bicubicFiltering) without visualization (QA
|
||
|
// Testing/Verification)
|
||
|
runAutoTest(argc, argv, (const char *)dumpFilename, g_FilterMode);
|
||
|
} else {
|
||
|
// This runs the CUDA kernel (bicubicFiltering) + OpenGL visualization
|
||
|
initialize(argc, argv);
|
||
|
glutMainLoop();
|
||
|
}
|
||
|
|
||
|
exit(EXIT_SUCCESS);
|
||
|
}
|