cuda-samples/Samples/5_Domain_Specific/simpleGLES_screen/simpleGLES_screen.cu
2022-01-13 11:35:24 +05:30

602 lines
18 KiB
Plaintext

/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
This example demonstrates how to use the CUDA C bindings to OpenGL ES to
dynamically modify a vertex buffer using a CUDA C kernel.
The steps are:
1. Create an empty vertex buffer object (VBO)
2. Register the VBO with CUDA C
3. Map the VBO for writing from CUDA C
4. Run CUDA C kernel to modify the vertex positions
5. Unmap the VBO
6. Render the results using OpenGL ES
Host code
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <stdarg.h>
#include <unistd.h>
#include <screen/screen.h>
#include "graphics_interface.c"
// includes, cuda
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
// Utilities and timing functions
#include <helper_functions.h> // includes cuda.h and cuda_runtime_api.h
// CUDA helper functions
#include <helper_cuda.h> // helper functions for CUDA error check
#include <vector_types.h>
#define MAX_EPSILON_ERROR 0.0f
#define THRESHOLD 0.0f
#define REFRESH_DELAY 1 // ms
#define GUI_IDLE 0x100
#define GUI_ROTATE 0x101
#define GUI_TRANSLATE 0x102
int gui_mode;
////////////////////////////////////////////////////////////////////////////////
// Default configuration
unsigned int window_width = 512;
unsigned int window_height = 512;
unsigned int dispno = 0;
// constants
const unsigned int mesh_width = 256;
const unsigned int mesh_height = 256;
// OpenGL ES variables and interop with CUDA C
GLuint mesh_vao, mesh_vbo;
struct cudaGraphicsResource *cuda_vbo_resource;
void *d_vbo_buffer = NULL;
float g_fAnim = 0.0;
// UI / mouse controls
int mouse_old_x, mouse_old_y;
int mouse_buttons = 0;
float rotate_x = 0.0, rotate_y = 0.0;
float translate_z = -3.0;
StopWatchInterface *timer = NULL;
// Frame statistics
int frame;
int fpsCount = 0; // FPS count for averaging
int fpsLimit = 1; // FPS limit for sampling
int g_Index = 0;
float avgFPS = 0.0f;
unsigned int frameCount = 0;
unsigned int g_TotalErrors = 0;
// The default number of seconds after which the test will end.
#define TIME_LIMIT 10.0 // 10 secs
// Flag indicating it is time to shut down
static GLboolean shutdown = GL_FALSE;
// Callback to close window
static void closeCB_app(void) { shutdown = GL_TRUE; }
// Callback to handle key presses
static void keyCB_app(char key, int state) {
// Ignoring releases
if (!state) return;
if ((key == 'q') || (key == 'Q') || (key == NvGlDemoKeyCode_Escape))
shutdown = GL_TRUE;
}
// Auto-Verification Code
bool g_bQAReadback = false;
int *pArgc = NULL;
char **pArgv = NULL;
#define MAX(a, b) ((a > b) ? a : b)
////////////////////////////////////////////////////////////////////////////////
// declaration, forward
// CUDA functionality
void runCuda(struct cudaGraphicsResource **vbo_resource);
void runAutoTest(int devID, char **argv, char *ref_file);
void checkResultCuda(int argc, char **argv, const GLuint &vbo);
const char *sSDKsample = "simpleGLES on Screen (VBO)";
void computeFPS() {
frameCount++;
fpsCount++;
if (fpsCount == fpsLimit) {
avgFPS = 1.f / (sdkGetAverageTimerValue(&timer) / 1000.f);
fpsCount = 0;
fpsLimit = (int)MAX(avgFPS, 1.f);
sdkResetTimer(&timer);
}
char fps[256];
sprintf(fps, "Cuda/OpenGL ES Interop (VBO): %3.1f fps (Max 1000 fps)",
avgFPS);
graphics_set_windowtitle(fps);
}
///////////////////////////////////////////////////////////////////////////////
//! Simple kernel to modify vertex positions in sine wave pattern
//! @param data data in global memory
///////////////////////////////////////////////////////////////////////////////
__global__ void simple_vbo_kernel(float4 *pos, unsigned int width,
unsigned int height, float time) {
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
// calculate uv coordinates
float u = x / (float)width;
float v = y / (float)height;
u = u * 2.0f - 1.0f;
v = v * 2.0f - 1.0f;
// calculate simple sine wave pattern
float freq = 4.0f;
float w = sinf(u * freq + time) * cosf(v * freq + time) * 0.5f;
// write output vertex
pos[y * width + x] = make_float4(u, w, v, 1.0f);
}
void launch_kernel(float4 *pos, unsigned int mesh_width,
unsigned int mesh_height, float time) {
// execute the kernel
dim3 block(8, 8, 1);
dim3 grid(mesh_width / block.x, mesh_height / block.y, 1);
simple_vbo_kernel<<<grid, block>>>(pos, mesh_width, mesh_height, time);
}
////////////////////////////////////////////////////////////////////////////////
//! Run the Cuda part of the computation
////////////////////////////////////////////////////////////////////////////////
void runCuda(struct cudaGraphicsResource **vbo_resource) {
// map OpenGL buffer object for writing from CUDA
float4 *dptr;
cudaGraphicsMapResources(1, vbo_resource, 0);
size_t num_bytes;
cudaGraphicsResourceGetMappedPointer((void **)&dptr, &num_bytes,
*vbo_resource);
launch_kernel(dptr, mesh_width, mesh_height, g_fAnim);
// unmap buffer object
cudaGraphicsUnmapResources(1, vbo_resource, 0);
}
#ifndef FOPEN
#define FOPEN(fHandle, filename, mode) (fHandle = fopen(filename, mode))
#endif
void sdkDumpBin2(void *data, unsigned int bytes, const char *filename) {
printf("sdkDumpBin: <%s>\n", filename);
FILE *fp;
FOPEN(fp, filename, "wb");
fwrite(data, bytes, 1, fp);
fflush(fp);
fclose(fp);
}
////////////////////////////////////////////////////////////////////////////////
//! Run the Cuda part of the computation
////////////////////////////////////////////////////////////////////////////////
void runAutoTest(int devID, char **argv, char *ref_file) {
char *reference_file = NULL;
void *imageData = malloc(mesh_width * mesh_height * sizeof(float));
// execute the kernel
launch_kernel((float4 *)d_vbo_buffer, mesh_width, mesh_height, g_fAnim);
cudaDeviceSynchronize();
getLastCudaError("launch_kernel failed");
cudaMemcpy(imageData, d_vbo_buffer, mesh_width * mesh_height * sizeof(float),
cudaMemcpyDeviceToHost);
sdkDumpBin2(imageData, mesh_width * mesh_height * sizeof(float),
"simpleGLES_screen.bin");
reference_file = sdkFindFilePath(ref_file, argv[0]);
if (reference_file &&
!sdkCompareBin2BinFloat("simpleGLES_screen.bin", reference_file,
mesh_width * mesh_height * sizeof(float),
MAX_EPSILON_ERROR, THRESHOLD, pArgv[0])) {
g_TotalErrors++;
}
}
////////////////////////////////////////////////////////////////////////////////
//! Display callback
////////////////////////////////////////////////////////////////////////////////
void display_thisframe(float time_delta) {
sdkStartTimer(&timer);
// run CUDA kernel to generate vertex positions
runCuda(&cuda_vbo_resource);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glDrawArrays(GL_POINTS, 0, mesh_width * mesh_height);
glFinish();
g_fAnim += time_delta;
sdkStopTimer(&timer);
computeFPS();
}
////////////////////////////////////////////////////////////////////////////////
//! Check if the result is correct or write data to file for external
//! regression testing
////////////////////////////////////////////////////////////////////////////////
void checkResultCuda(int argc, char **argv, const GLuint &vbo) {
if (!d_vbo_buffer) {
printf("%s: Mapping result buffer from OpenGL ES\n", __FUNCTION__);
cudaGraphicsUnregisterResource(cuda_vbo_resource);
// map buffer object
glBindBuffer(GL_ARRAY_BUFFER, vbo);
float *data = (float *)glMapBufferRange(
GL_ARRAY_BUFFER, 0, mesh_width * mesh_height * 4 * sizeof(float),
GL_READ_ONLY);
// check result
if (checkCmdLineFlag(argc, (const char **)argv, "regression")) {
// write file for regression test
sdkWriteFile<float>("./data/regression.dat", data,
mesh_width * mesh_height * 3, 0.0, false);
}
// unmap GL buffer object
if (!glUnmapBuffer(GL_ARRAY_BUFFER)) {
fprintf(stderr, "Unmap buffer failed.\n");
fflush(stderr);
}
checkCudaErrors(cudaGraphicsGLRegisterBuffer(
&cuda_vbo_resource, vbo, cudaGraphicsMapFlagsWriteDiscard));
CHECK_GLERROR();
}
}
GLuint mesh_shader = 0;
void readAndCompileShaderFromGLSLFile(GLuint new_shaderprogram,
const char *filename, GLenum shaderType) {
FILE *file = fopen(filename, "rb"); // open shader text file
if (!file) {
error_exit("Filename %s does not exist\n", filename);
}
// get the size of the file and read it
fseek(file, 0, SEEK_END);
GLint size = ftell(file);
char *data = (char *)malloc(sizeof(char) * (size + 1));
memset(data, 0, sizeof(char) * (size + 1));
fseek(file, 0, SEEK_SET);
size_t res = fread(data, 1, size, file);
fclose(file);
GLuint shader = glCreateShader(shaderType);
glShaderSource(shader, 1, (const GLchar **)&data, &size);
glCompileShader(shader);
CHECK_GLERROR();
GLint compile_success = 0;
glGetShaderiv(shader, GL_COMPILE_STATUS, &compile_success);
CHECK_GLERROR();
if (compile_success == GL_FALSE) {
printf("Compilation of %s failed!\n Reason:\n", filename);
GLint maxLength = 0;
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &maxLength);
char errorLog[maxLength];
glGetShaderInfoLog(shader, maxLength, &maxLength, &errorLog[0]);
printf("%s", errorLog);
glDeleteShader(shader);
exit(1);
}
glAttachShader(new_shaderprogram, shader);
glDeleteShader(shader);
free(data);
}
GLuint ShaderCreate(const char *vshader_filename,
const char *fshader_filename) {
printf("Loading GLSL shaders %s %s\n", vshader_filename, fshader_filename);
GLuint new_shaderprogram = glCreateProgram();
CHECK_GLERROR();
if (vshader_filename) {
readAndCompileShaderFromGLSLFile(new_shaderprogram, vshader_filename,
GL_VERTEX_SHADER);
}
CHECK_GLERROR();
if (fshader_filename) {
readAndCompileShaderFromGLSLFile(new_shaderprogram, fshader_filename,
GL_FRAGMENT_SHADER);
}
CHECK_GLERROR();
glLinkProgram(new_shaderprogram);
CHECK_GLERROR();
GLint link_success;
glGetProgramiv(new_shaderprogram, GL_LINK_STATUS, &link_success);
if (link_success == GL_FALSE) {
printf("Linking of %s with %s failed!\n Reason:\n", vshader_filename,
fshader_filename);
GLint maxLength = 0;
glGetShaderiv(new_shaderprogram, GL_INFO_LOG_LENGTH, &maxLength);
char errorLog[maxLength];
glGetShaderInfoLog(new_shaderprogram, maxLength, &maxLength, &errorLog[0]);
printf("%s", errorLog);
exit(EXIT_FAILURE);
}
return new_shaderprogram;
}
//===========================================================================
// InitGraphicsState() - initialize OpenGL
//===========================================================================
static void InitGraphicsState(void) {
char *GL_version = (char *)glGetString(GL_VERSION);
char *GL_vendor = (char *)glGetString(GL_VENDOR);
char *GL_renderer = (char *)glGetString(GL_RENDERER);
printf("Version: %s\n", GL_version);
printf("Vendor: %s\n", GL_vendor);
printf("Renderer: %s\n", GL_renderer);
// RENDERING SETUP (OpenGL ES or OpenGL Core Profile!)
glGenVertexArrays(1, &mesh_vao); // Features' Vertex Array Object allocation
glBindVertexArray(mesh_vao); // bind VAO
// initialize buffer object
glGenBuffers(1, &mesh_vbo);
glBindBuffer(GL_ARRAY_BUFFER, mesh_vbo);
unsigned int size = mesh_width * mesh_height * 4 * sizeof(float);
glBufferData(GL_ARRAY_BUFFER, size, NULL, GL_DYNAMIC_DRAW);
glVertexAttribPointer((GLuint)0, 4, GL_FLOAT, GL_FALSE, 0, 0);
glEnableVertexAttribArray(0);
checkCudaErrors(cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, mesh_vbo,
cudaGraphicsMapFlagsNone));
// GLSL stuff
char *vertex_shader_path = sdkFindFilePath("mesh.vert.glsl", pArgv[0]);
char *fragment_shader_path = sdkFindFilePath("mesh.frag.glsl", pArgv[0]);
if (vertex_shader_path == NULL || fragment_shader_path == NULL) {
printf("Error finding shader file\n");
exit(EXIT_FAILURE);
}
mesh_shader = ShaderCreate(vertex_shader_path, fragment_shader_path);
CHECK_GLERROR();
free(vertex_shader_path);
free(fragment_shader_path);
glUseProgram(mesh_shader);
}
////////////////////////////////////////////////////////////////////////////////
//! Run a simple test for CUDA
////////////////////////////////////////////////////////////////////////////////
bool runTest(int argc, char **argv, char *ref_file) {
// command line mode only
if (ref_file != NULL) {
// This will pick the best possible CUDA capable device
// int devID = findCudaDevice(argc, (const char **)argv);
#if defined(__aarch64__) || defined(__arm__)
// find iGPU on the system which is compute capable which will perform
// GLES-CUDA interop
int devID = findIntegratedGPU();
#else
// use command-line specified CUDA device, otherwise use device with highest
// Gflops/s
int devID = findCudaDevice(argc, (const char **)argv);
#endif
// create VBO
checkCudaErrors(cudaMalloc((void **)&d_vbo_buffer,
mesh_width * mesh_height * 4 * sizeof(float)));
// run the cuda part
runAutoTest(devID, argv, ref_file);
// check result of Cuda step
checkResultCuda(argc, argv, mesh_vbo);
cudaFree(d_vbo_buffer);
d_vbo_buffer = NULL;
} else {
double endTime = TIME_LIMIT;
// this would use command-line specified CUDA device, note that CUDA
// defaults to highest Gflops/s device
if (checkCmdLineFlag(argc, (const char **)argv, "device")) {
error_exit("Device setting not yet implemented!\n");
}
// display selection
if (checkCmdLineFlag(argc, (const char **)argv, "dispno")) {
dispno = getCmdLineArgumentInt(argc, (const char **)argv, "dispno");
}
// Window width
if (checkCmdLineFlag(argc, (const char **)argv, "width")) {
window_width = getCmdLineArgumentInt(argc, (const char **)argv, "width");
}
// Window Height
if (checkCmdLineFlag(argc, (const char **)argv, "height")) {
window_height =
getCmdLineArgumentInt(argc, (const char **)argv, "height");
}
// Determine how long to run for in secs: default is 10s
if (checkCmdLineFlag(argc, (const char **)argv, "runtime")) {
endTime = getCmdLineArgumentInt(argc, (const char **)argv, "runtime");
}
SetCloseCB(closeCB_app);
SetKeyCB(keyCB_app);
// create QNX screen window and set up associated OpenGL ES context
graphics_setup_window(0, 0, window_width, window_height, sSDKsample,
dispno);
#if defined(__aarch64__) || defined(__arm__)
// find iGPU on the system which is compute capable which will perform
// GLES-CUDA interop
int devID = findIntegratedGPU();
#else
// use command-line specified CUDA device, otherwise use device with highest
// Gflops/s
int devID = findCudaDevice(argc, (const char **)argv);
#endif
InitGraphicsState(); // set up GLES stuff
glClearColor(0, 0.5, 1, 1); // blue-ish background
glClear(GL_COLOR_BUFFER_BIT);
graphics_swap_buffers();
int frame = 0;
struct timeval begin, end;
gettimeofday(&begin, NULL);
// Print runtime
if (endTime < 0.0) {
endTime = TIME_LIMIT;
printf(" running forever...\n");
} else {
printf(" running for %f seconds...\n", endTime);
}
while (!shutdown) {
frame++;
display_thisframe(0.010);
usleep(1000);
graphics_swap_buffers();
CheckEvents();
gettimeofday(&end, 0);
double elapsed = (end.tv_sec - begin.tv_sec) +
((end.tv_usec - begin.tv_usec) / 1000000.0);
// Check whether time limit has been exceeded
if (!shutdown) shutdown = (endTime <= elapsed);
}
// NOTE: Before destroying OpenGL ES context, must unregister all shared
// resources from CUDA !
checkCudaErrors(cudaGraphicsUnregisterResource(cuda_vbo_resource));
graphics_close_window(); // close window and destroy OpenGL ES context
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv) {
char *ref_file = NULL;
pArgc = &argc;
pArgv = argv;
#if defined(__linux__)
setenv("DISPLAY", ":0", 0);
#endif
printf("%s starting...\n", sSDKsample);
if (argc > 1) {
if (checkCmdLineFlag(argc, (const char **)argv, "file")) {
// In this mode, we run without OpenGL and see if VBO is generated
// correctly
getCmdLineArgumentString(argc, (const char **)argv, "file",
(char **)&ref_file);
}
}
printf("\n");
runTest(argc, argv, ref_file);
printf("%s completed, returned %s\n", sSDKsample,
(g_TotalErrors == 0) ? "OK" : "ERROR!");
exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}