cuda-samples/Samples/simpleGL/simpleGL.cu

587 lines
17 KiB
Plaintext
Raw Normal View History

2021-10-21 19:04:49 +08:00
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
This example demonstrates how to use the Cuda OpenGL bindings to
dynamically modify a vertex buffer using a Cuda kernel.
The steps are:
1. Create an empty vertex buffer object (VBO)
2. Register the VBO with Cuda
3. Map the VBO for writing from Cuda
4. Run Cuda kernel to modify the vertex positions
5. Unmap the VBO
6. Render the results using OpenGL
Host code
*/
// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#ifdef _WIN32
# define WINDOWS_LEAN_AND_MEAN
# define NOMINMAX
# include <windows.h>
#endif
// OpenGL Graphics includes
#include <helper_gl.h>
#if defined (__APPLE__) || defined(MACOSX)
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
#include <GLUT/glut.h>
#ifndef glutCloseFunc
#define glutCloseFunc glutWMCloseFunc
#endif
#else
#include <GL/freeglut.h>
#endif
// includes, cuda
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
// Utilities and timing functions
#include <helper_functions.h> // includes cuda.h and cuda_runtime_api.h
// CUDA helper functions
#include <helper_cuda.h> // helper functions for CUDA error check
#include <vector_types.h>
#define MAX_EPSILON_ERROR 10.0f
#define THRESHOLD 0.30f
#define REFRESH_DELAY 10 //ms
////////////////////////////////////////////////////////////////////////////////
// constants
const unsigned int window_width = 512;
const unsigned int window_height = 512;
const unsigned int mesh_width = 256;
const unsigned int mesh_height = 256;
// vbo variables
GLuint vbo;
struct cudaGraphicsResource *cuda_vbo_resource;
void *d_vbo_buffer = NULL;
float g_fAnim = 0.0;
// mouse controls
int mouse_old_x, mouse_old_y;
int mouse_buttons = 0;
float rotate_x = 0.0, rotate_y = 0.0;
float translate_z = -3.0;
StopWatchInterface *timer = NULL;
// Auto-Verification Code
int fpsCount = 0; // FPS count for averaging
int fpsLimit = 1; // FPS limit for sampling
int g_Index = 0;
float avgFPS = 0.0f;
unsigned int frameCount = 0;
unsigned int g_TotalErrors = 0;
bool g_bQAReadback = false;
int *pArgc = NULL;
char **pArgv = NULL;
#define MAX(a,b) ((a > b) ? a : b)
////////////////////////////////////////////////////////////////////////////////
// declaration, forward
bool runTest(int argc, char **argv, char *ref_file);
void cleanup();
// GL functionality
bool initGL(int *argc, char **argv);
void createVBO(GLuint *vbo, struct cudaGraphicsResource **vbo_res,
unsigned int vbo_res_flags);
void deleteVBO(GLuint *vbo, struct cudaGraphicsResource *vbo_res);
// rendering callbacks
void display();
void keyboard(unsigned char key, int x, int y);
void mouse(int button, int state, int x, int y);
void motion(int x, int y);
void timerEvent(int value);
// Cuda functionality
void runCuda(struct cudaGraphicsResource **vbo_resource);
void runAutoTest(int devID, char **argv, char *ref_file);
void checkResultCuda(int argc, char **argv, const GLuint &vbo);
const char *sSDKsample = "simpleGL (VBO)";
///////////////////////////////////////////////////////////////////////////////
//! Simple kernel to modify vertex positions in sine wave pattern
//! @param data data in global memory
///////////////////////////////////////////////////////////////////////////////
__global__ void simple_vbo_kernel(float4 *pos, unsigned int width, unsigned int height, float time)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
// calculate uv coordinates
float u = x / (float) width;
float v = y / (float) height;
u = u*2.0f - 1.0f;
v = v*2.0f - 1.0f;
// calculate simple sine wave pattern
float freq = 4.0f;
float w = sinf(u*freq + time) * cosf(v*freq + time) * 0.5f;
// write output vertex
pos[y*width+x] = make_float4(u, w, v, 1.0f);
}
void launch_kernel(float4 *pos, unsigned int mesh_width,
unsigned int mesh_height, float time)
{
// execute the kernel
dim3 block(8, 8, 1);
dim3 grid(mesh_width / block.x, mesh_height / block.y, 1);
simple_vbo_kernel<<< grid, block>>>(pos, mesh_width, mesh_height, time);
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv)
{
char *ref_file = NULL;
pArgc = &argc;
pArgv = argv;
#if defined(__linux__)
setenv ("DISPLAY", ":0", 0);
#endif
printf("%s starting...\n", sSDKsample);
if (argc > 1)
{
if (checkCmdLineFlag(argc, (const char **)argv, "file"))
{
// In this mode, we are running non-OpenGL and doing a compare of the VBO was generated correctly
getCmdLineArgumentString(argc, (const char **)argv, "file", (char **)&ref_file);
}
}
printf("\n");
runTest(argc, argv, ref_file);
printf("%s completed, returned %s\n", sSDKsample, (g_TotalErrors == 0) ? "OK" : "ERROR!");
exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
void computeFPS()
{
frameCount++;
fpsCount++;
if (fpsCount == fpsLimit)
{
avgFPS = 1.f / (sdkGetAverageTimerValue(&timer) / 1000.f);
fpsCount = 0;
fpsLimit = (int)MAX(avgFPS, 1.f);
sdkResetTimer(&timer);
}
char fps[256];
sprintf(fps, "Cuda GL Interop (VBO): %3.1f fps (Max 100Hz)", avgFPS);
glutSetWindowTitle(fps);
}
////////////////////////////////////////////////////////////////////////////////
//! Initialize GL
////////////////////////////////////////////////////////////////////////////////
bool initGL(int *argc, char **argv)
{
glutInit(argc, argv);
glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
glutInitWindowSize(window_width, window_height);
glutCreateWindow("Cuda GL Interop (VBO)");
glutDisplayFunc(display);
glutKeyboardFunc(keyboard);
glutMotionFunc(motion);
glutTimerFunc(REFRESH_DELAY, timerEvent,0);
// initialize necessary OpenGL extensions
if (! isGLVersionSupported(2,0))
{
fprintf(stderr, "ERROR: Support for necessary OpenGL extensions missing.");
fflush(stderr);
return false;
}
// default initialization
glClearColor(0.0, 0.0, 0.0, 1.0);
glDisable(GL_DEPTH_TEST);
// viewport
glViewport(0, 0, window_width, window_height);
// projection
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluPerspective(60.0, (GLfloat)window_width / (GLfloat) window_height, 0.1, 10.0);
SDK_CHECK_ERROR_GL();
return true;
}
////////////////////////////////////////////////////////////////////////////////
//! Run a simple test for CUDA
////////////////////////////////////////////////////////////////////////////////
bool runTest(int argc, char **argv, char *ref_file)
{
// Create the CUTIL timer
sdkCreateTimer(&timer);
// use command-line specified CUDA device, otherwise use device with highest Gflops/s
int devID = findCudaDevice(argc, (const char **)argv);
// command line mode only
if (ref_file != NULL)
{
// create VBO
checkCudaErrors(cudaMalloc((void **)&d_vbo_buffer, mesh_width*mesh_height*4*sizeof(float)));
// run the cuda part
runAutoTest(devID, argv, ref_file);
// check result of Cuda step
checkResultCuda(argc, argv, vbo);
cudaFree(d_vbo_buffer);
d_vbo_buffer = NULL;
}
else
{
// First initialize OpenGL context, so we can properly set the GL for CUDA.
// This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
if (false == initGL(&argc, argv))
{
return false;
}
// register callbacks
glutDisplayFunc(display);
glutKeyboardFunc(keyboard);
glutMouseFunc(mouse);
glutMotionFunc(motion);
#if defined (__APPLE__) || defined(MACOSX)
atexit(cleanup);
#else
glutCloseFunc(cleanup);
#endif
// create VBO
createVBO(&vbo, &cuda_vbo_resource, cudaGraphicsMapFlagsWriteDiscard);
// run the cuda part
runCuda(&cuda_vbo_resource);
// start rendering mainloop
glutMainLoop();
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
//! Run the Cuda part of the computation
////////////////////////////////////////////////////////////////////////////////
void runCuda(struct cudaGraphicsResource **vbo_resource)
{
// map OpenGL buffer object for writing from CUDA
float4 *dptr;
checkCudaErrors(cudaGraphicsMapResources(1, vbo_resource, 0));
size_t num_bytes;
checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void **)&dptr, &num_bytes,
*vbo_resource));
//printf("CUDA mapped VBO: May access %ld bytes\n", num_bytes);
// execute the kernel
// dim3 block(8, 8, 1);
// dim3 grid(mesh_width / block.x, mesh_height / block.y, 1);
// kernel<<< grid, block>>>(dptr, mesh_width, mesh_height, g_fAnim);
launch_kernel(dptr, mesh_width, mesh_height, g_fAnim);
// unmap buffer object
checkCudaErrors(cudaGraphicsUnmapResources(1, vbo_resource, 0));
}
#ifdef _WIN32
#ifndef FOPEN
#define FOPEN(fHandle,filename,mode) fopen_s(&fHandle, filename, mode)
#endif
#else
#ifndef FOPEN
#define FOPEN(fHandle,filename,mode) (fHandle = fopen(filename, mode))
#endif
#endif
void sdkDumpBin2(void *data, unsigned int bytes, const char *filename)
{
printf("sdkDumpBin: <%s>\n", filename);
FILE *fp;
FOPEN(fp, filename, "wb");
fwrite(data, bytes, 1, fp);
fflush(fp);
fclose(fp);
}
////////////////////////////////////////////////////////////////////////////////
//! Run the Cuda part of the computation
////////////////////////////////////////////////////////////////////////////////
void runAutoTest(int devID, char **argv, char *ref_file)
{
char *reference_file = NULL;
void *imageData = malloc(mesh_width*mesh_height*sizeof(float));
// execute the kernel
launch_kernel((float4 *)d_vbo_buffer, mesh_width, mesh_height, g_fAnim);
cudaDeviceSynchronize();
getLastCudaError("launch_kernel failed");
checkCudaErrors(cudaMemcpy(imageData, d_vbo_buffer, mesh_width*mesh_height*sizeof(float), cudaMemcpyDeviceToHost));
sdkDumpBin2(imageData, mesh_width*mesh_height*sizeof(float), "simpleGL.bin");
reference_file = sdkFindFilePath(ref_file, argv[0]);
if (reference_file &&
!sdkCompareBin2BinFloat("simpleGL.bin", reference_file,
mesh_width*mesh_height*sizeof(float),
MAX_EPSILON_ERROR, THRESHOLD, pArgv[0]))
{
g_TotalErrors++;
}
}
////////////////////////////////////////////////////////////////////////////////
//! Create VBO
////////////////////////////////////////////////////////////////////////////////
void createVBO(GLuint *vbo, struct cudaGraphicsResource **vbo_res,
unsigned int vbo_res_flags)
{
assert(vbo);
// create buffer object
glGenBuffers(1, vbo);
glBindBuffer(GL_ARRAY_BUFFER, *vbo);
// initialize buffer object
unsigned int size = mesh_width * mesh_height * 4 * sizeof(float);
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
// register this buffer object with CUDA
checkCudaErrors(cudaGraphicsGLRegisterBuffer(vbo_res, *vbo, vbo_res_flags));
SDK_CHECK_ERROR_GL();
}
////////////////////////////////////////////////////////////////////////////////
//! Delete VBO
////////////////////////////////////////////////////////////////////////////////
void deleteVBO(GLuint *vbo, struct cudaGraphicsResource *vbo_res)
{
// unregister this buffer object with CUDA
checkCudaErrors(cudaGraphicsUnregisterResource(vbo_res));
glBindBuffer(1, *vbo);
glDeleteBuffers(1, vbo);
*vbo = 0;
}
////////////////////////////////////////////////////////////////////////////////
//! Display callback
////////////////////////////////////////////////////////////////////////////////
void display()
{
sdkStartTimer(&timer);
// run CUDA kernel to generate vertex positions
runCuda(&cuda_vbo_resource);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
// set view matrix
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glTranslatef(0.0, 0.0, translate_z);
glRotatef(rotate_x, 1.0, 0.0, 0.0);
glRotatef(rotate_y, 0.0, 1.0, 0.0);
// render from the vbo
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glVertexPointer(4, GL_FLOAT, 0, 0);
glEnableClientState(GL_VERTEX_ARRAY);
glColor3f(1.0, 0.0, 0.0);
glDrawArrays(GL_POINTS, 0, mesh_width * mesh_height);
glDisableClientState(GL_VERTEX_ARRAY);
glutSwapBuffers();
g_fAnim += 0.01f;
sdkStopTimer(&timer);
computeFPS();
}
void timerEvent(int value)
{
if (glutGetWindow())
{
glutPostRedisplay();
glutTimerFunc(REFRESH_DELAY, timerEvent,0);
}
}
void cleanup()
{
sdkDeleteTimer(&timer);
if (vbo)
{
deleteVBO(&vbo, cuda_vbo_resource);
}
}
////////////////////////////////////////////////////////////////////////////////
//! Keyboard events handler
////////////////////////////////////////////////////////////////////////////////
void keyboard(unsigned char key, int /*x*/, int /*y*/)
{
switch (key)
{
case (27) :
#if defined(__APPLE__) || defined(MACOSX)
exit(EXIT_SUCCESS);
#else
glutDestroyWindow(glutGetWindow());
return;
#endif
}
}
////////////////////////////////////////////////////////////////////////////////
//! Mouse event handlers
////////////////////////////////////////////////////////////////////////////////
void mouse(int button, int state, int x, int y)
{
if (state == GLUT_DOWN)
{
mouse_buttons |= 1<<button;
}
else if (state == GLUT_UP)
{
mouse_buttons = 0;
}
mouse_old_x = x;
mouse_old_y = y;
}
void motion(int x, int y)
{
float dx, dy;
dx = (float)(x - mouse_old_x);
dy = (float)(y - mouse_old_y);
if (mouse_buttons & 1)
{
rotate_x += dy * 0.2f;
rotate_y += dx * 0.2f;
}
else if (mouse_buttons & 4)
{
translate_z += dy * 0.01f;
}
mouse_old_x = x;
mouse_old_y = y;
}
////////////////////////////////////////////////////////////////////////////////
//! Check if the result is correct or write data to file for external
//! regression testing
////////////////////////////////////////////////////////////////////////////////
void checkResultCuda(int argc, char **argv, const GLuint &vbo)
{
if (!d_vbo_buffer)
{
checkCudaErrors(cudaGraphicsUnregisterResource(cuda_vbo_resource));
// map buffer object
glBindBuffer(GL_ARRAY_BUFFER, vbo);
float *data = (float *) glMapBuffer(GL_ARRAY_BUFFER, GL_READ_ONLY);
// check result
if (checkCmdLineFlag(argc, (const char **) argv, "regression"))
{
// write file for regression test
sdkWriteFile<float>("./data/regression.dat",
data, mesh_width * mesh_height * 3, 0.0, false);
}
// unmap GL buffer object
if (!glUnmapBuffer(GL_ARRAY_BUFFER))
{
fprintf(stderr, "Unmap buffer failed.\n");
fflush(stderr);
}
checkCudaErrors(cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo,
cudaGraphicsMapFlagsWriteDiscard));
SDK_CHECK_ERROR_GL();
}
}