mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-01-20 03:45:54 +08:00
587 lines
17 KiB
Plaintext
587 lines
17 KiB
Plaintext
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/*
|
|
This example demonstrates how to use the Cuda OpenGL bindings to
|
|
dynamically modify a vertex buffer using a Cuda kernel.
|
|
|
|
The steps are:
|
|
1. Create an empty vertex buffer object (VBO)
|
|
2. Register the VBO with Cuda
|
|
3. Map the VBO for writing from Cuda
|
|
4. Run Cuda kernel to modify the vertex positions
|
|
5. Unmap the VBO
|
|
6. Render the results using OpenGL
|
|
|
|
Host code
|
|
*/
|
|
|
|
// includes, system
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <math.h>
|
|
|
|
#ifdef _WIN32
|
|
# define WINDOWS_LEAN_AND_MEAN
|
|
# define NOMINMAX
|
|
# include <windows.h>
|
|
#endif
|
|
|
|
// OpenGL Graphics includes
|
|
#include <helper_gl.h>
|
|
#if defined (__APPLE__) || defined(MACOSX)
|
|
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
|
#include <GLUT/glut.h>
|
|
#ifndef glutCloseFunc
|
|
#define glutCloseFunc glutWMCloseFunc
|
|
#endif
|
|
#else
|
|
#include <GL/freeglut.h>
|
|
#endif
|
|
|
|
// includes, cuda
|
|
#include <cuda_runtime.h>
|
|
#include <cuda_gl_interop.h>
|
|
|
|
// Utilities and timing functions
|
|
#include <helper_functions.h> // includes cuda.h and cuda_runtime_api.h
|
|
|
|
// CUDA helper functions
|
|
#include <helper_cuda.h> // helper functions for CUDA error check
|
|
|
|
#include <vector_types.h>
|
|
|
|
#define MAX_EPSILON_ERROR 10.0f
|
|
#define THRESHOLD 0.30f
|
|
#define REFRESH_DELAY 10 //ms
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// constants
|
|
const unsigned int window_width = 512;
|
|
const unsigned int window_height = 512;
|
|
|
|
const unsigned int mesh_width = 256;
|
|
const unsigned int mesh_height = 256;
|
|
|
|
// vbo variables
|
|
GLuint vbo;
|
|
struct cudaGraphicsResource *cuda_vbo_resource;
|
|
void *d_vbo_buffer = NULL;
|
|
|
|
float g_fAnim = 0.0;
|
|
|
|
// mouse controls
|
|
int mouse_old_x, mouse_old_y;
|
|
int mouse_buttons = 0;
|
|
float rotate_x = 0.0, rotate_y = 0.0;
|
|
float translate_z = -3.0;
|
|
|
|
StopWatchInterface *timer = NULL;
|
|
|
|
// Auto-Verification Code
|
|
int fpsCount = 0; // FPS count for averaging
|
|
int fpsLimit = 1; // FPS limit for sampling
|
|
int g_Index = 0;
|
|
float avgFPS = 0.0f;
|
|
unsigned int frameCount = 0;
|
|
unsigned int g_TotalErrors = 0;
|
|
bool g_bQAReadback = false;
|
|
|
|
int *pArgc = NULL;
|
|
char **pArgv = NULL;
|
|
|
|
#define MAX(a,b) ((a > b) ? a : b)
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// declaration, forward
|
|
bool runTest(int argc, char **argv, char *ref_file);
|
|
void cleanup();
|
|
|
|
// GL functionality
|
|
bool initGL(int *argc, char **argv);
|
|
void createVBO(GLuint *vbo, struct cudaGraphicsResource **vbo_res,
|
|
unsigned int vbo_res_flags);
|
|
void deleteVBO(GLuint *vbo, struct cudaGraphicsResource *vbo_res);
|
|
|
|
// rendering callbacks
|
|
void display();
|
|
void keyboard(unsigned char key, int x, int y);
|
|
void mouse(int button, int state, int x, int y);
|
|
void motion(int x, int y);
|
|
void timerEvent(int value);
|
|
|
|
// Cuda functionality
|
|
void runCuda(struct cudaGraphicsResource **vbo_resource);
|
|
void runAutoTest(int devID, char **argv, char *ref_file);
|
|
void checkResultCuda(int argc, char **argv, const GLuint &vbo);
|
|
|
|
const char *sSDKsample = "simpleGL (VBO)";
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//! Simple kernel to modify vertex positions in sine wave pattern
|
|
//! @param data data in global memory
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
__global__ void simple_vbo_kernel(float4 *pos, unsigned int width, unsigned int height, float time)
|
|
{
|
|
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
|
|
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
|
|
|
|
// calculate uv coordinates
|
|
float u = x / (float) width;
|
|
float v = y / (float) height;
|
|
u = u*2.0f - 1.0f;
|
|
v = v*2.0f - 1.0f;
|
|
|
|
// calculate simple sine wave pattern
|
|
float freq = 4.0f;
|
|
float w = sinf(u*freq + time) * cosf(v*freq + time) * 0.5f;
|
|
|
|
// write output vertex
|
|
pos[y*width+x] = make_float4(u, w, v, 1.0f);
|
|
}
|
|
|
|
|
|
void launch_kernel(float4 *pos, unsigned int mesh_width,
|
|
unsigned int mesh_height, float time)
|
|
{
|
|
// execute the kernel
|
|
dim3 block(8, 8, 1);
|
|
dim3 grid(mesh_width / block.x, mesh_height / block.y, 1);
|
|
simple_vbo_kernel<<< grid, block>>>(pos, mesh_width, mesh_height, time);
|
|
}
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Program main
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
int main(int argc, char **argv)
|
|
{
|
|
char *ref_file = NULL;
|
|
|
|
pArgc = &argc;
|
|
pArgv = argv;
|
|
|
|
#if defined(__linux__)
|
|
setenv ("DISPLAY", ":0", 0);
|
|
#endif
|
|
|
|
printf("%s starting...\n", sSDKsample);
|
|
|
|
if (argc > 1)
|
|
{
|
|
if (checkCmdLineFlag(argc, (const char **)argv, "file"))
|
|
{
|
|
// In this mode, we are running non-OpenGL and doing a compare of the VBO was generated correctly
|
|
getCmdLineArgumentString(argc, (const char **)argv, "file", (char **)&ref_file);
|
|
}
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
runTest(argc, argv, ref_file);
|
|
|
|
printf("%s completed, returned %s\n", sSDKsample, (g_TotalErrors == 0) ? "OK" : "ERROR!");
|
|
exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
|
|
}
|
|
|
|
void computeFPS()
|
|
{
|
|
frameCount++;
|
|
fpsCount++;
|
|
|
|
if (fpsCount == fpsLimit)
|
|
{
|
|
avgFPS = 1.f / (sdkGetAverageTimerValue(&timer) / 1000.f);
|
|
fpsCount = 0;
|
|
fpsLimit = (int)MAX(avgFPS, 1.f);
|
|
|
|
sdkResetTimer(&timer);
|
|
}
|
|
|
|
char fps[256];
|
|
sprintf(fps, "Cuda GL Interop (VBO): %3.1f fps (Max 100Hz)", avgFPS);
|
|
glutSetWindowTitle(fps);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//! Initialize GL
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
bool initGL(int *argc, char **argv)
|
|
{
|
|
glutInit(argc, argv);
|
|
glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
|
|
glutInitWindowSize(window_width, window_height);
|
|
glutCreateWindow("Cuda GL Interop (VBO)");
|
|
glutDisplayFunc(display);
|
|
glutKeyboardFunc(keyboard);
|
|
glutMotionFunc(motion);
|
|
glutTimerFunc(REFRESH_DELAY, timerEvent,0);
|
|
|
|
// initialize necessary OpenGL extensions
|
|
if (! isGLVersionSupported(2,0))
|
|
{
|
|
fprintf(stderr, "ERROR: Support for necessary OpenGL extensions missing.");
|
|
fflush(stderr);
|
|
return false;
|
|
}
|
|
|
|
// default initialization
|
|
glClearColor(0.0, 0.0, 0.0, 1.0);
|
|
glDisable(GL_DEPTH_TEST);
|
|
|
|
// viewport
|
|
glViewport(0, 0, window_width, window_height);
|
|
|
|
// projection
|
|
glMatrixMode(GL_PROJECTION);
|
|
glLoadIdentity();
|
|
gluPerspective(60.0, (GLfloat)window_width / (GLfloat) window_height, 0.1, 10.0);
|
|
|
|
SDK_CHECK_ERROR_GL();
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//! Run a simple test for CUDA
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
bool runTest(int argc, char **argv, char *ref_file)
|
|
{
|
|
// Create the CUTIL timer
|
|
sdkCreateTimer(&timer);
|
|
|
|
// use command-line specified CUDA device, otherwise use device with highest Gflops/s
|
|
int devID = findCudaDevice(argc, (const char **)argv);
|
|
|
|
// command line mode only
|
|
if (ref_file != NULL)
|
|
{
|
|
// create VBO
|
|
checkCudaErrors(cudaMalloc((void **)&d_vbo_buffer, mesh_width*mesh_height*4*sizeof(float)));
|
|
|
|
// run the cuda part
|
|
runAutoTest(devID, argv, ref_file);
|
|
|
|
// check result of Cuda step
|
|
checkResultCuda(argc, argv, vbo);
|
|
|
|
cudaFree(d_vbo_buffer);
|
|
d_vbo_buffer = NULL;
|
|
}
|
|
else
|
|
{
|
|
// First initialize OpenGL context, so we can properly set the GL for CUDA.
|
|
// This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
|
|
if (false == initGL(&argc, argv))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
// register callbacks
|
|
glutDisplayFunc(display);
|
|
glutKeyboardFunc(keyboard);
|
|
glutMouseFunc(mouse);
|
|
glutMotionFunc(motion);
|
|
#if defined (__APPLE__) || defined(MACOSX)
|
|
atexit(cleanup);
|
|
#else
|
|
glutCloseFunc(cleanup);
|
|
#endif
|
|
|
|
// create VBO
|
|
createVBO(&vbo, &cuda_vbo_resource, cudaGraphicsMapFlagsWriteDiscard);
|
|
|
|
// run the cuda part
|
|
runCuda(&cuda_vbo_resource);
|
|
|
|
// start rendering mainloop
|
|
glutMainLoop();
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//! Run the Cuda part of the computation
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void runCuda(struct cudaGraphicsResource **vbo_resource)
|
|
{
|
|
// map OpenGL buffer object for writing from CUDA
|
|
float4 *dptr;
|
|
checkCudaErrors(cudaGraphicsMapResources(1, vbo_resource, 0));
|
|
size_t num_bytes;
|
|
checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void **)&dptr, &num_bytes,
|
|
*vbo_resource));
|
|
//printf("CUDA mapped VBO: May access %ld bytes\n", num_bytes);
|
|
|
|
// execute the kernel
|
|
// dim3 block(8, 8, 1);
|
|
// dim3 grid(mesh_width / block.x, mesh_height / block.y, 1);
|
|
// kernel<<< grid, block>>>(dptr, mesh_width, mesh_height, g_fAnim);
|
|
|
|
launch_kernel(dptr, mesh_width, mesh_height, g_fAnim);
|
|
|
|
// unmap buffer object
|
|
checkCudaErrors(cudaGraphicsUnmapResources(1, vbo_resource, 0));
|
|
}
|
|
|
|
#ifdef _WIN32
|
|
#ifndef FOPEN
|
|
#define FOPEN(fHandle,filename,mode) fopen_s(&fHandle, filename, mode)
|
|
#endif
|
|
#else
|
|
#ifndef FOPEN
|
|
#define FOPEN(fHandle,filename,mode) (fHandle = fopen(filename, mode))
|
|
#endif
|
|
#endif
|
|
|
|
void sdkDumpBin2(void *data, unsigned int bytes, const char *filename)
|
|
{
|
|
printf("sdkDumpBin: <%s>\n", filename);
|
|
FILE *fp;
|
|
FOPEN(fp, filename, "wb");
|
|
fwrite(data, bytes, 1, fp);
|
|
fflush(fp);
|
|
fclose(fp);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//! Run the Cuda part of the computation
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void runAutoTest(int devID, char **argv, char *ref_file)
|
|
{
|
|
char *reference_file = NULL;
|
|
void *imageData = malloc(mesh_width*mesh_height*sizeof(float));
|
|
|
|
// execute the kernel
|
|
launch_kernel((float4 *)d_vbo_buffer, mesh_width, mesh_height, g_fAnim);
|
|
|
|
cudaDeviceSynchronize();
|
|
getLastCudaError("launch_kernel failed");
|
|
|
|
checkCudaErrors(cudaMemcpy(imageData, d_vbo_buffer, mesh_width*mesh_height*sizeof(float), cudaMemcpyDeviceToHost));
|
|
|
|
sdkDumpBin2(imageData, mesh_width*mesh_height*sizeof(float), "simpleGL.bin");
|
|
reference_file = sdkFindFilePath(ref_file, argv[0]);
|
|
|
|
if (reference_file &&
|
|
!sdkCompareBin2BinFloat("simpleGL.bin", reference_file,
|
|
mesh_width*mesh_height*sizeof(float),
|
|
MAX_EPSILON_ERROR, THRESHOLD, pArgv[0]))
|
|
{
|
|
g_TotalErrors++;
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//! Create VBO
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void createVBO(GLuint *vbo, struct cudaGraphicsResource **vbo_res,
|
|
unsigned int vbo_res_flags)
|
|
{
|
|
assert(vbo);
|
|
|
|
// create buffer object
|
|
glGenBuffers(1, vbo);
|
|
glBindBuffer(GL_ARRAY_BUFFER, *vbo);
|
|
|
|
// initialize buffer object
|
|
unsigned int size = mesh_width * mesh_height * 4 * sizeof(float);
|
|
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
|
|
|
|
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
|
|
|
// register this buffer object with CUDA
|
|
checkCudaErrors(cudaGraphicsGLRegisterBuffer(vbo_res, *vbo, vbo_res_flags));
|
|
|
|
SDK_CHECK_ERROR_GL();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//! Delete VBO
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void deleteVBO(GLuint *vbo, struct cudaGraphicsResource *vbo_res)
|
|
{
|
|
|
|
// unregister this buffer object with CUDA
|
|
checkCudaErrors(cudaGraphicsUnregisterResource(vbo_res));
|
|
|
|
glBindBuffer(1, *vbo);
|
|
glDeleteBuffers(1, vbo);
|
|
|
|
*vbo = 0;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//! Display callback
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void display()
|
|
{
|
|
sdkStartTimer(&timer);
|
|
|
|
// run CUDA kernel to generate vertex positions
|
|
runCuda(&cuda_vbo_resource);
|
|
|
|
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
|
|
|
|
// set view matrix
|
|
glMatrixMode(GL_MODELVIEW);
|
|
glLoadIdentity();
|
|
glTranslatef(0.0, 0.0, translate_z);
|
|
glRotatef(rotate_x, 1.0, 0.0, 0.0);
|
|
glRotatef(rotate_y, 0.0, 1.0, 0.0);
|
|
|
|
// render from the vbo
|
|
glBindBuffer(GL_ARRAY_BUFFER, vbo);
|
|
glVertexPointer(4, GL_FLOAT, 0, 0);
|
|
|
|
glEnableClientState(GL_VERTEX_ARRAY);
|
|
glColor3f(1.0, 0.0, 0.0);
|
|
glDrawArrays(GL_POINTS, 0, mesh_width * mesh_height);
|
|
glDisableClientState(GL_VERTEX_ARRAY);
|
|
|
|
glutSwapBuffers();
|
|
|
|
g_fAnim += 0.01f;
|
|
|
|
sdkStopTimer(&timer);
|
|
computeFPS();
|
|
}
|
|
|
|
void timerEvent(int value)
|
|
{
|
|
if (glutGetWindow())
|
|
{
|
|
glutPostRedisplay();
|
|
glutTimerFunc(REFRESH_DELAY, timerEvent,0);
|
|
}
|
|
}
|
|
|
|
void cleanup()
|
|
{
|
|
sdkDeleteTimer(&timer);
|
|
|
|
if (vbo)
|
|
{
|
|
deleteVBO(&vbo, cuda_vbo_resource);
|
|
}
|
|
}
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//! Keyboard events handler
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void keyboard(unsigned char key, int /*x*/, int /*y*/)
|
|
{
|
|
switch (key)
|
|
{
|
|
case (27) :
|
|
#if defined(__APPLE__) || defined(MACOSX)
|
|
exit(EXIT_SUCCESS);
|
|
#else
|
|
glutDestroyWindow(glutGetWindow());
|
|
return;
|
|
#endif
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//! Mouse event handlers
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void mouse(int button, int state, int x, int y)
|
|
{
|
|
if (state == GLUT_DOWN)
|
|
{
|
|
mouse_buttons |= 1<<button;
|
|
}
|
|
else if (state == GLUT_UP)
|
|
{
|
|
mouse_buttons = 0;
|
|
}
|
|
|
|
mouse_old_x = x;
|
|
mouse_old_y = y;
|
|
}
|
|
|
|
void motion(int x, int y)
|
|
{
|
|
float dx, dy;
|
|
dx = (float)(x - mouse_old_x);
|
|
dy = (float)(y - mouse_old_y);
|
|
|
|
if (mouse_buttons & 1)
|
|
{
|
|
rotate_x += dy * 0.2f;
|
|
rotate_y += dx * 0.2f;
|
|
}
|
|
else if (mouse_buttons & 4)
|
|
{
|
|
translate_z += dy * 0.01f;
|
|
}
|
|
|
|
mouse_old_x = x;
|
|
mouse_old_y = y;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//! Check if the result is correct or write data to file for external
|
|
//! regression testing
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void checkResultCuda(int argc, char **argv, const GLuint &vbo)
|
|
{
|
|
if (!d_vbo_buffer)
|
|
{
|
|
checkCudaErrors(cudaGraphicsUnregisterResource(cuda_vbo_resource));
|
|
|
|
// map buffer object
|
|
glBindBuffer(GL_ARRAY_BUFFER, vbo);
|
|
float *data = (float *) glMapBuffer(GL_ARRAY_BUFFER, GL_READ_ONLY);
|
|
|
|
// check result
|
|
if (checkCmdLineFlag(argc, (const char **) argv, "regression"))
|
|
{
|
|
// write file for regression test
|
|
sdkWriteFile<float>("./data/regression.dat",
|
|
data, mesh_width * mesh_height * 3, 0.0, false);
|
|
}
|
|
|
|
// unmap GL buffer object
|
|
if (!glUnmapBuffer(GL_ARRAY_BUFFER))
|
|
{
|
|
fprintf(stderr, "Unmap buffer failed.\n");
|
|
fflush(stderr);
|
|
}
|
|
|
|
checkCudaErrors(cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo,
|
|
cudaGraphicsMapFlagsWriteDiscard));
|
|
|
|
SDK_CHECK_ERROR_GL();
|
|
}
|
|
}
|