/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* This example demonstrates how to use the Cuda OpenGL bindings to dynamically modify a vertex buffer using a Cuda kernel. The steps are: 1. Create an empty vertex buffer object (VBO) 2. Register the VBO with Cuda 3. Map the VBO for writing from Cuda 4. Run Cuda kernel to modify the vertex positions 5. Unmap the VBO 6. Render the results using OpenGL Host code */ // includes, system #include #include #include #include #ifdef _WIN32 # define WINDOWS_LEAN_AND_MEAN # define NOMINMAX # include #endif // OpenGL Graphics includes #include #if defined (__APPLE__) || defined(MACOSX) #pragma clang diagnostic ignored "-Wdeprecated-declarations" #include #ifndef glutCloseFunc #define glutCloseFunc glutWMCloseFunc #endif #else #include #endif // includes, cuda #include #include // Utilities and timing functions #include // includes cuda.h and cuda_runtime_api.h // CUDA helper functions #include // helper functions for CUDA error check #include #define MAX_EPSILON_ERROR 10.0f #define THRESHOLD 0.30f #define REFRESH_DELAY 10 //ms //////////////////////////////////////////////////////////////////////////////// // constants const unsigned int window_width = 512; const unsigned int window_height = 512; const unsigned int mesh_width = 256; const unsigned int mesh_height = 256; // vbo variables GLuint vbo; struct cudaGraphicsResource *cuda_vbo_resource; void *d_vbo_buffer = NULL; float g_fAnim = 0.0; // mouse controls int mouse_old_x, mouse_old_y; int mouse_buttons = 0; float rotate_x = 0.0, rotate_y = 0.0; float translate_z = -3.0; StopWatchInterface *timer = NULL; // Auto-Verification Code int fpsCount = 0; // FPS count for averaging int fpsLimit = 1; // FPS limit for sampling int g_Index = 0; float avgFPS = 0.0f; unsigned int frameCount = 0; unsigned int g_TotalErrors = 0; bool g_bQAReadback = false; int *pArgc = NULL; char **pArgv = NULL; #define MAX(a,b) ((a > b) ? a : b) //////////////////////////////////////////////////////////////////////////////// // declaration, forward bool runTest(int argc, char **argv, char *ref_file); void cleanup(); // GL functionality bool initGL(int *argc, char **argv); void createVBO(GLuint *vbo, struct cudaGraphicsResource **vbo_res, unsigned int vbo_res_flags); void deleteVBO(GLuint *vbo, struct cudaGraphicsResource *vbo_res); // rendering callbacks void display(); void keyboard(unsigned char key, int x, int y); void mouse(int button, int state, int x, int y); void motion(int x, int y); void timerEvent(int value); // Cuda functionality void runCuda(struct cudaGraphicsResource **vbo_resource); void runAutoTest(int devID, char **argv, char *ref_file); void checkResultCuda(int argc, char **argv, const GLuint &vbo); const char *sSDKsample = "simpleGL (VBO)"; /////////////////////////////////////////////////////////////////////////////// //! Simple kernel to modify vertex positions in sine wave pattern //! @param data data in global memory /////////////////////////////////////////////////////////////////////////////// __global__ void simple_vbo_kernel(float4 *pos, unsigned int width, unsigned int height, float time) { unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; // calculate uv coordinates float u = x / (float) width; float v = y / (float) height; u = u*2.0f - 1.0f; v = v*2.0f - 1.0f; // calculate simple sine wave pattern float freq = 4.0f; float w = sinf(u*freq + time) * cosf(v*freq + time) * 0.5f; // write output vertex pos[y*width+x] = make_float4(u, w, v, 1.0f); } void launch_kernel(float4 *pos, unsigned int mesh_width, unsigned int mesh_height, float time) { // execute the kernel dim3 block(8, 8, 1); dim3 grid(mesh_width / block.x, mesh_height / block.y, 1); simple_vbo_kernel<<< grid, block>>>(pos, mesh_width, mesh_height, time); } //////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main(int argc, char **argv) { char *ref_file = NULL; pArgc = &argc; pArgv = argv; #if defined(__linux__) setenv ("DISPLAY", ":0", 0); #endif printf("%s starting...\n", sSDKsample); if (argc > 1) { if (checkCmdLineFlag(argc, (const char **)argv, "file")) { // In this mode, we are running non-OpenGL and doing a compare of the VBO was generated correctly getCmdLineArgumentString(argc, (const char **)argv, "file", (char **)&ref_file); } } printf("\n"); runTest(argc, argv, ref_file); printf("%s completed, returned %s\n", sSDKsample, (g_TotalErrors == 0) ? "OK" : "ERROR!"); exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } void computeFPS() { frameCount++; fpsCount++; if (fpsCount == fpsLimit) { avgFPS = 1.f / (sdkGetAverageTimerValue(&timer) / 1000.f); fpsCount = 0; fpsLimit = (int)MAX(avgFPS, 1.f); sdkResetTimer(&timer); } char fps[256]; sprintf(fps, "Cuda GL Interop (VBO): %3.1f fps (Max 100Hz)", avgFPS); glutSetWindowTitle(fps); } //////////////////////////////////////////////////////////////////////////////// //! Initialize GL //////////////////////////////////////////////////////////////////////////////// bool initGL(int *argc, char **argv) { glutInit(argc, argv); glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE); glutInitWindowSize(window_width, window_height); glutCreateWindow("Cuda GL Interop (VBO)"); glutDisplayFunc(display); glutKeyboardFunc(keyboard); glutMotionFunc(motion); glutTimerFunc(REFRESH_DELAY, timerEvent,0); // initialize necessary OpenGL extensions if (! isGLVersionSupported(2,0)) { fprintf(stderr, "ERROR: Support for necessary OpenGL extensions missing."); fflush(stderr); return false; } // default initialization glClearColor(0.0, 0.0, 0.0, 1.0); glDisable(GL_DEPTH_TEST); // viewport glViewport(0, 0, window_width, window_height); // projection glMatrixMode(GL_PROJECTION); glLoadIdentity(); gluPerspective(60.0, (GLfloat)window_width / (GLfloat) window_height, 0.1, 10.0); SDK_CHECK_ERROR_GL(); return true; } //////////////////////////////////////////////////////////////////////////////// //! Run a simple test for CUDA //////////////////////////////////////////////////////////////////////////////// bool runTest(int argc, char **argv, char *ref_file) { // Create the CUTIL timer sdkCreateTimer(&timer); // use command-line specified CUDA device, otherwise use device with highest Gflops/s int devID = findCudaDevice(argc, (const char **)argv); // command line mode only if (ref_file != NULL) { // create VBO checkCudaErrors(cudaMalloc((void **)&d_vbo_buffer, mesh_width*mesh_height*4*sizeof(float))); // run the cuda part runAutoTest(devID, argv, ref_file); // check result of Cuda step checkResultCuda(argc, argv, vbo); cudaFree(d_vbo_buffer); d_vbo_buffer = NULL; } else { // First initialize OpenGL context, so we can properly set the GL for CUDA. // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop. if (false == initGL(&argc, argv)) { return false; } // register callbacks glutDisplayFunc(display); glutKeyboardFunc(keyboard); glutMouseFunc(mouse); glutMotionFunc(motion); #if defined (__APPLE__) || defined(MACOSX) atexit(cleanup); #else glutCloseFunc(cleanup); #endif // create VBO createVBO(&vbo, &cuda_vbo_resource, cudaGraphicsMapFlagsWriteDiscard); // run the cuda part runCuda(&cuda_vbo_resource); // start rendering mainloop glutMainLoop(); } return true; } //////////////////////////////////////////////////////////////////////////////// //! Run the Cuda part of the computation //////////////////////////////////////////////////////////////////////////////// void runCuda(struct cudaGraphicsResource **vbo_resource) { // map OpenGL buffer object for writing from CUDA float4 *dptr; checkCudaErrors(cudaGraphicsMapResources(1, vbo_resource, 0)); size_t num_bytes; checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void **)&dptr, &num_bytes, *vbo_resource)); //printf("CUDA mapped VBO: May access %ld bytes\n", num_bytes); // execute the kernel // dim3 block(8, 8, 1); // dim3 grid(mesh_width / block.x, mesh_height / block.y, 1); // kernel<<< grid, block>>>(dptr, mesh_width, mesh_height, g_fAnim); launch_kernel(dptr, mesh_width, mesh_height, g_fAnim); // unmap buffer object checkCudaErrors(cudaGraphicsUnmapResources(1, vbo_resource, 0)); } #ifdef _WIN32 #ifndef FOPEN #define FOPEN(fHandle,filename,mode) fopen_s(&fHandle, filename, mode) #endif #else #ifndef FOPEN #define FOPEN(fHandle,filename,mode) (fHandle = fopen(filename, mode)) #endif #endif void sdkDumpBin2(void *data, unsigned int bytes, const char *filename) { printf("sdkDumpBin: <%s>\n", filename); FILE *fp; FOPEN(fp, filename, "wb"); fwrite(data, bytes, 1, fp); fflush(fp); fclose(fp); } //////////////////////////////////////////////////////////////////////////////// //! Run the Cuda part of the computation //////////////////////////////////////////////////////////////////////////////// void runAutoTest(int devID, char **argv, char *ref_file) { char *reference_file = NULL; void *imageData = malloc(mesh_width*mesh_height*sizeof(float)); // execute the kernel launch_kernel((float4 *)d_vbo_buffer, mesh_width, mesh_height, g_fAnim); cudaDeviceSynchronize(); getLastCudaError("launch_kernel failed"); checkCudaErrors(cudaMemcpy(imageData, d_vbo_buffer, mesh_width*mesh_height*sizeof(float), cudaMemcpyDeviceToHost)); sdkDumpBin2(imageData, mesh_width*mesh_height*sizeof(float), "simpleGL.bin"); reference_file = sdkFindFilePath(ref_file, argv[0]); if (reference_file && !sdkCompareBin2BinFloat("simpleGL.bin", reference_file, mesh_width*mesh_height*sizeof(float), MAX_EPSILON_ERROR, THRESHOLD, pArgv[0])) { g_TotalErrors++; } } //////////////////////////////////////////////////////////////////////////////// //! Create VBO //////////////////////////////////////////////////////////////////////////////// void createVBO(GLuint *vbo, struct cudaGraphicsResource **vbo_res, unsigned int vbo_res_flags) { assert(vbo); // create buffer object glGenBuffers(1, vbo); glBindBuffer(GL_ARRAY_BUFFER, *vbo); // initialize buffer object unsigned int size = mesh_width * mesh_height * 4 * sizeof(float); glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW); glBindBuffer(GL_ARRAY_BUFFER, 0); // register this buffer object with CUDA checkCudaErrors(cudaGraphicsGLRegisterBuffer(vbo_res, *vbo, vbo_res_flags)); SDK_CHECK_ERROR_GL(); } //////////////////////////////////////////////////////////////////////////////// //! Delete VBO //////////////////////////////////////////////////////////////////////////////// void deleteVBO(GLuint *vbo, struct cudaGraphicsResource *vbo_res) { // unregister this buffer object with CUDA checkCudaErrors(cudaGraphicsUnregisterResource(vbo_res)); glBindBuffer(1, *vbo); glDeleteBuffers(1, vbo); *vbo = 0; } //////////////////////////////////////////////////////////////////////////////// //! Display callback //////////////////////////////////////////////////////////////////////////////// void display() { sdkStartTimer(&timer); // run CUDA kernel to generate vertex positions runCuda(&cuda_vbo_resource); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // set view matrix glMatrixMode(GL_MODELVIEW); glLoadIdentity(); glTranslatef(0.0, 0.0, translate_z); glRotatef(rotate_x, 1.0, 0.0, 0.0); glRotatef(rotate_y, 0.0, 1.0, 0.0); // render from the vbo glBindBuffer(GL_ARRAY_BUFFER, vbo); glVertexPointer(4, GL_FLOAT, 0, 0); glEnableClientState(GL_VERTEX_ARRAY); glColor3f(1.0, 0.0, 0.0); glDrawArrays(GL_POINTS, 0, mesh_width * mesh_height); glDisableClientState(GL_VERTEX_ARRAY); glutSwapBuffers(); g_fAnim += 0.01f; sdkStopTimer(&timer); computeFPS(); } void timerEvent(int value) { if (glutGetWindow()) { glutPostRedisplay(); glutTimerFunc(REFRESH_DELAY, timerEvent,0); } } void cleanup() { sdkDeleteTimer(&timer); if (vbo) { deleteVBO(&vbo, cuda_vbo_resource); } } //////////////////////////////////////////////////////////////////////////////// //! Keyboard events handler //////////////////////////////////////////////////////////////////////////////// void keyboard(unsigned char key, int /*x*/, int /*y*/) { switch (key) { case (27) : #if defined(__APPLE__) || defined(MACOSX) exit(EXIT_SUCCESS); #else glutDestroyWindow(glutGetWindow()); return; #endif } } //////////////////////////////////////////////////////////////////////////////// //! Mouse event handlers //////////////////////////////////////////////////////////////////////////////// void mouse(int button, int state, int x, int y) { if (state == GLUT_DOWN) { mouse_buttons |= 1<("./data/regression.dat", data, mesh_width * mesh_height * 3, 0.0, false); } // unmap GL buffer object if (!glUnmapBuffer(GL_ARRAY_BUFFER)) { fprintf(stderr, "Unmap buffer failed.\n"); fflush(stderr); } checkCudaErrors(cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo, cudaGraphicsMapFlagsWriteDiscard)); SDK_CHECK_ERROR_GL(); } }