/* * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. * * Please refer to the NVIDIA end user license agreement (EULA) associated * with this source code for terms and conditions that govern your use of * this software. Any use, reproduction, disclosure, or distribution of * this software and related documentation outside the terms of the EULA * is strictly prohibited. * */ // Includes #include #include #include #include #include #include #include void error_exit(const char* format, ... ) { va_list args; va_start( args, format ); vfprintf( stderr, format, args ); va_end( args ); exit(1); } // GLES related includes and Xlib and EGL stuff #include "graphics_interface.h" // CUDA standard includes #include #include // CUDA FFT Libraries #include // CUDA helper functions #include #include #include #include "defines.h" #include "fluidsGLES_kernels.h" typedef float matrix4[4][4]; typedef float vector3[3]; #define MAX_EPSILON_ERROR 1.0f const char *sSDKname = "fluidsGLES"; // CUDA example code that implements the frequency space version of // Jos Stam's paper 'Stable Fluids' in 2D. This application uses the // CUDA FFT library (CUFFT) to perform velocity diffusion and to // force non-divergence in the velocity field at each time step. It uses // CUDA-OpenGLES interoperability to update the particle field directly // instead of doing a copy to system memory before drawing. Texture is // used for automatic bilinear interpolation at the velocity advection step. void cleanup(void); void reshape(int x, int y); // CUFFT plan handle cufftHandle planr2c; cufftHandle planc2r; static cData *vxfield = NULL; static cData *vyfield = NULL; cData *hvfield = NULL; cData *dvfield = NULL; static int wWidth = MAX(512, DIM); static int wHeight = MAX(512, DIM); static int clicked = 0; static int fpsCount = 0; static int fpsLimit = 1; StopWatchInterface *timer = NULL; int gui_mode; // For X window // Rotate & translate variable temp., will remove and use shaders. float rotate_x = 0.0, rotate_y = 0.0; float translate_z = -3.0; // Particle data GLuint vbo = 0,vao = 0; // OpenGLES vertex buffer object GLuint m_texture = 0; struct cudaGraphicsResource *cuda_vbo_resource; // handles OpenGLES-CUDA exchange static cData *particles = NULL; // particle positions in host memory static int lastx = 0, lasty = 0; // Texture pitch size_t tPitch = 0; // Now this is compatible with gcc in 64-bit char *ref_file = NULL; bool g_bQAAddTestForce = true; int g_iFrameToCompare = 100; int g_TotalErrors = 0; bool g_bExitESC = false; const unsigned int window_width = 512; const unsigned int window_height = 512; // CheckFBO/BackBuffer class objects CheckRender *g_CheckRender = NULL; void autoTest(char **); void displayFrame(); void keyboard(unsigned char key, int x, int y, int argc, char **argv); extern "C" void addForces(cData *v, int dx, int dy, int spx, int spy, float fx, float fy, int r); extern "C" void advectVelocity(cData *v, float *vx, float *vy, int dx, int pdx, int dy, float dt); extern "C" void diffuseProject(cData *vx, cData *vy, int dx, int dy, float dt, float visc); extern "C" void updateVelocity(cData *v, float *vx, float *vy, int dx, int pdx, int dy); extern "C" void advectParticles(GLuint vbo, cData *v, int dx, int dy, float dt); void simulateFluids(void) { // simulate fluid advectVelocity(dvfield, (float *)vxfield, (float *)vyfield, DIM, RPADW, DIM, DT); diffuseProject(vxfield, vyfield, CPADW, DIM, DT, VIS); updateVelocity(dvfield, (float *)vxfield, (float *)vyfield, DIM, RPADW, DIM); advectParticles(vbo, dvfield, DIM, DIM, DT); } GLuint mesh_shader = 0; void mat_identity(matrix4 m) { m[0][1] = m[0][2] = m[0][3] = m[1][0] = m[1][2] = m[1][3] = m[2][0] = m[2][1] = m[2][3] = m[3][0] = m[3][1] = m[3][2] = 0.0f; m[0][0] = m[1][1] = m[2][2] = m[3][3] = 1.0f; } void mat_multiply(matrix4 m0, matrix4 m1) { float m[4]; for(int r = 0; r < 4; r++) { m[0] = m[1] = m[2] = m[3] = 0.0f; for(int c = 0; c < 4; c++) { for(int i = 0; i < 4; i++) { m[c] += m0[i][r] * m1[c][i]; } } for(int c = 0; c < 4; c++) { m0[c][r] = m[c]; } } } void mat4f_Ortho(float left, float right, float bottom, float top, float near, float far, matrix4 m) { float r_l = right - left; float t_b = top - bottom; float f_n = far - near; float tx = - (right + left) / (right - left); float ty = - (top + bottom) / (top - bottom); float tz = - (far + near) / (far - near); matrix4 m2; m2[0][0] = 2.0f/ r_l; m2[0][1] = 0.0f; m2[0][2] = 0.0f; m2[0][3] = 0.0f; m2[1][0] = 0.0f; m2[1][1] = 2.0f / t_b; m2[1][2] = 0.0f; m2[1][3] = 0.0f; m2[2][0] = 0.0f; m2[2][1] = 0.0f; m2[2][2] = -2.0f / f_n; m2[2][3] = 0.0f; m2[3][0] = tx; m2[3][1] = ty; m2[3][2] = tz; m2[3][3] = 1.0f; mat_multiply(m, m2); } void readAndCompileShaderFromGLSLFile(GLuint new_shaderprogram, const char *filename, GLenum shaderType) { FILE *file = fopen(filename,"rb"); // open shader text file if (!file) error_exit("Filename %s does not exist\n", filename); /* get the size of the file and read it */ fseek(file,0,SEEK_END); GLint size = ftell(file); char *data = (char*)malloc(sizeof(char)*(size + 1)); memset(data, 0, sizeof(char)*(size + 1)); fseek(file,0,SEEK_SET); size_t res = fread(data,1,size,file); fclose(file); GLuint shader = glCreateShader(shaderType); glShaderSource(shader, 1, (const GLchar**)&data, &size); glCompileShader(shader); GET_GLERROR(0); GLint compile_success = 0; glGetShaderiv(shader, GL_COMPILE_STATUS, &compile_success); GET_GLERROR(0); if (compile_success == GL_FALSE) { printf("Compilation of %s failed!\n Reason:\n", filename); GLint maxLength = 0; glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &maxLength); char errorLog[maxLength]; glGetShaderInfoLog(shader, maxLength, &maxLength, &errorLog[0]); printf("%s", errorLog); glDeleteShader(shader); exit(1); } glAttachShader(new_shaderprogram, shader); free(data); } GLuint ShaderCreate(const char *vshader_filename, const char *fshader_filename) { printf("Loading GLSL shaders %s %s\n", vshader_filename, fshader_filename); GLuint new_shaderprogram = glCreateProgram(); GET_GLERROR(0); if (vshader_filename) readAndCompileShaderFromGLSLFile(new_shaderprogram, vshader_filename, GL_VERTEX_SHADER); GET_GLERROR(0); if (fshader_filename) readAndCompileShaderFromGLSLFile(new_shaderprogram, fshader_filename, GL_FRAGMENT_SHADER); GET_GLERROR(0); glLinkProgram(new_shaderprogram); GET_GLERROR(0); GLint link_success; glGetProgramiv(new_shaderprogram, GL_LINK_STATUS, &link_success); if (link_success == GL_FALSE) { printf("Linking of %s with %s failed!\n Reason:\n", vshader_filename, fshader_filename); GLint maxLength = 0; glGetShaderiv(new_shaderprogram, GL_INFO_LOG_LENGTH, &maxLength); char errorLog[maxLength]; glGetShaderInfoLog(new_shaderprogram, maxLength, &maxLength, &errorLog[0]); printf("%s", errorLog); exit(EXIT_FAILURE); } return new_shaderprogram; } void motion(int x, int y) { // Convert motion coordinates to domain float fx = (lastx / (float)wWidth); float fy = (lasty / (float)wHeight); int nx = (int)(fx * DIM); int ny = (int)(fy * DIM); if (clicked && nx < DIM-FR && nx > FR-1 && ny < DIM-FR && ny > FR-1) { int ddx = x - lastx; int ddy = y - lasty; fx = ddx / (float)wWidth; fy = ddy / (float)wHeight; int spy = ny-FR; int spx = nx-FR; addForces(dvfield, DIM, DIM, spx, spy, FORCE * DT * fx, FORCE * DT * fy, FR); lastx = x; lasty = y; } } //=========================================================================== // InitGraphicsState() - initialize OpenGLES //=========================================================================== static void InitGraphicsState(int argc, char** argv) { char *GL_version = (char *)glGetString(GL_VERSION); char *GL_vendor = (char *)glGetString(GL_VENDOR); char *GL_renderer = (char *)glGetString(GL_RENDERER); printf("Version: %s\n", GL_version); printf("Vendor: %s\n", GL_vendor); printf("Renderer: %s\n", GL_renderer); // Allocate and initialize host data GLint bsize; // initialize buffer object glGenBuffers(1, &vbo); glBindBuffer(GL_ARRAY_BUFFER, vbo); glBufferData(GL_ARRAY_BUFFER, sizeof(cData) * DS, particles, GL_DYNAMIC_DRAW); glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, &bsize); if (bsize != (sizeof(cData) * DS)) { printf("Failed to initialize GL extensions.\n"); exit(EXIT_FAILURE); } checkCudaErrors(cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo, cudaGraphicsMapFlagsNone)); // GLSL stuff char *vertex_shader_path = sdkFindFilePath("mesh.vert.glsl", argv[0]); char *fragment_shader_path = sdkFindFilePath("mesh.frag.glsl", argv[0]); if (vertex_shader_path == NULL || fragment_shader_path == NULL) { printf("Error finding shader file\n"); exit(EXIT_FAILURE); } mesh_shader = ShaderCreate(vertex_shader_path, fragment_shader_path); GET_GLERROR(0); free(vertex_shader_path); free(fragment_shader_path); glUseProgram(mesh_shader); } void displayFrame(void) { if (!ref_file) { sdkStartTimer(&timer); simulateFluids(); } GLint view_arr[4]; glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glEnable(GL_BLEND); glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); glDepthMask(GL_FALSE); glUseProgram(mesh_shader); // Set modelview and projection matricies GLint h_ModelViewMatrix = glGetUniformLocation(mesh_shader, "modelview"); GLint h_ProjectionMatrix = glGetUniformLocation(mesh_shader, "projection"); matrix4 modelview; matrix4 projection; mat_identity(modelview); mat_identity(projection); // (float left, float right, float bottom, float top, float near, float far, matrix4 m) mat4f_Ortho(0.0, 1.0, 1.0, 0.0, 0.0, 1.0, projection); glUniformMatrix4fv(h_ModelViewMatrix, 1, GL_FALSE, (GLfloat*)modelview); glUniformMatrix4fv(h_ProjectionMatrix, 1, GL_FALSE, (GLfloat*)projection); // Set position coords GLint h_position = glGetAttribLocation(mesh_shader, "a_position"); glEnableVertexAttribArray(h_position); glVertexAttribPointer(h_position, 2, GL_FLOAT, GL_FALSE, 0, 0); glBindBuffer(GL_ARRAY_BUFFER, vbo); glDrawArrays(GL_POINTS, 0, DS*sizeof(cData)); glDisableVertexAttribArray(h_position); glDisable(GL_DEPTH_TEST); glDisable(GL_CULL_FACE); glDisable(GL_BLEND); glDepthMask(GL_TRUE); if (ref_file) { return; } glUseProgram(0); // Finish timing before swap buffers to avoid refresh sync sdkStopTimer(&timer); graphics_swap_buffers(); fpsCount++; if (fpsCount == fpsLimit) { char fps[256]; float ifps = 1.f / (sdkGetAverageTimerValue(&timer) / 1000.f); sprintf(fps, "Cuda/GL Stable Fluids (%d x %d): %3.1f fps", DIM, DIM, ifps); graphics_set_windowtitle(fps); fpsCount = 0; fpsLimit = (int)MAX(ifps, 1.f); sdkResetTimer(&timer); } } void autoTest(char **argv) { CFrameBufferObject *fbo = new CFrameBufferObject(wWidth, wHeight, 4, false, GL_TEXTURE_2D); g_CheckRender = new CheckFBO(wWidth, wHeight, 4, fbo); g_CheckRender->setPixelFormat(GL_RGBA); g_CheckRender->setExecPath(argv[0]); g_CheckRender->EnableQAReadback(true); fbo->bindRenderPath(); for (int count=0; countunbindRenderPath(); // compare to offical reference image, printing PASS or FAIL. printf("> (Frame %d) Readback BackBuffer\n", 100); g_CheckRender->readback(wWidth, wHeight); g_CheckRender->savePPM("fluidsGLES.ppm", true, NULL); if (!g_CheckRender->PPMvsPPM("fluidsGLES.ppm", ref_file, MAX_EPSILON_ERROR, 0.25f)) { g_TotalErrors++; } } // Run fluids Simulation bool runFluidsSimulation(int argc, char **argv, char *ref_file) { // Create the CUTIL timer sdkCreateTimer(&timer); if (ref_file != NULL) { // command line mode only - auto test graphics_setup_window(0,0, wWidth, wHeight, sSDKname); InitGraphicsState(argc, argv); // set up GLES stuff autoTest(argv); cleanup(); } else { // create X11 window and set up associated OpenGL ES context graphics_setup_window(0,0, wWidth, wHeight, sSDKname); InitGraphicsState(argc, argv); // set up GLES stuff glClear(GL_COLOR_BUFFER_BIT); graphics_swap_buffers(); XEvent event; KeySym key; char text[255]; while (1) { while (XPending(display) > 0) { XNextEvent(display, &event); if (event.type==Expose && event.xexpose.count==0) { printf("Redraw requested!\n"); } if (event.type==KeyPress && XLookupString(&event.xkey,text,255,&key,0)==1) { if (text[0] == 27 || text[0] == 'q' || text[0] == 'Q') { keyboard(text[0], 0, 0, argc, argv); return true; } if (text[0] == 114) { keyboard(text[0], 0, 0, argc, argv); } printf("You pressed the %c key!\n",text[0]); } if (event.type==ButtonPress) { lastx = event.xbutton.x; lasty = event.xbutton.y; clicked = !clicked; } if (event.type==ButtonRelease) { lastx = event.xbutton.x; lasty = event.xbutton.y; clicked = !clicked; } if (event.type == MotionNotify) { motion(event.xmotion.x, event.xmotion.y); } else { XFlush(display); } } displayFrame(); usleep(1000); // need not take full CPU and GPU } } return true; } // very simple von neumann middle-square prng. can't use rand() in -qatest // mode because its implementation varies across platforms which makes testing // for consistency in the important parts of this program difficult. float myrand(void) { static int seed = 72191; char sq[22]; if (ref_file) { seed *= seed; sprintf(sq, "%010d", seed); // pull the middle 5 digits out of sq sq[8] = 0; seed = atoi(&sq[3]); return seed/99999.f; } else { return rand()/(float)RAND_MAX; } } void initParticles(cData *p, int dx, int dy) { int i, j; for (i = 0; i < dy; i++) { for (j = 0; j < dx; j++) { p[i*dx+j].x = (j+0.5f+(myrand() - 0.5f))/dx; p[i*dx+j].y = (i+0.5f+(myrand() - 0.5f))/dy; } } } void keyboard(unsigned char key, int x, int y, int argc, char **argv) { switch (key) { case 'q': case 'Q': case 27: g_bExitESC = true; cleanup(); graphics_close_window(); // close window and destroy OpenGL ES context return; break; case 'r': printf("\nResetting\n"); memset(hvfield, 0, sizeof(cData) * DS); cudaMemcpy(dvfield, hvfield, sizeof(cData) * DS, cudaMemcpyHostToDevice); initParticles(particles, DIM, DIM); checkCudaErrors(cudaGraphicsUnregisterResource(cuda_vbo_resource)); getLastCudaError("cudaGraphicsUnregisterBuffer failed"); glBindBuffer(GL_ARRAY_BUFFER, 0); glDeleteBuffers(1, &vbo); InitGraphicsState(argc, argv); // set up GLES stuff graphics_swap_buffers(); getLastCudaError("cudaGraphicsGLRegisterBuffer failed"); break; default: break; } } void cleanup(void) { checkCudaErrors(cudaGraphicsUnregisterResource(cuda_vbo_resource)); deleteTexture(); // Free all host and device resources free(hvfield); free(particles); checkCudaErrors(cudaFree(dvfield)); checkCudaErrors(cudaFree(vxfield)); checkCudaErrors(cudaFree(vyfield)); checkCudaErrors(cufftDestroy(planr2c)); checkCudaErrors(cufftDestroy(planc2r)); glBindBuffer(GL_ARRAY_BUFFER, 0); glDeleteBuffers(1, &vbo); sdkDeleteTimer(&timer); } int main(int argc, char **argv) { int devID; cudaDeviceProp deviceProps; #if defined(__linux__) setenv ("DISPLAY", ":0", 0); #endif printf("%s Starting...\n\n", sSDKname); printf("NOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled.\n\n"); #if defined (__aarch64__) || defined(__arm__) // find iGPU on the system which is compute capable which will perform GLES-CUDA interop devID = findIntegratedGPU(); #else // use command-line specified CUDA device, otherwise use device with highest Gflops/s devID = findCudaDevice(argc, (const char **)argv); #endif // get number of SMs on this GPU checkCudaErrors(cudaGetDeviceProperties(&deviceProps, devID)); printf("CUDA device [%s] has %d Multi-Processors\n", deviceProps.name, deviceProps.multiProcessorCount); // automated build testing harness if (checkCmdLineFlag(argc, (const char **)argv, "file")) { getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file); } sdkCreateTimer(&timer); sdkResetTimer(&timer); hvfield = (cData *)malloc(sizeof(cData) * DS); memset(hvfield, 0, sizeof(cData) * DS); // Allocate and initialize device data checkCudaErrors(cudaMallocPitch((void **)&dvfield, &tPitch, sizeof(cData)*DIM, DIM)); checkCudaErrors(cudaMemcpy(dvfield, hvfield, sizeof(cData) * DS, cudaMemcpyHostToDevice)); // Temporary complex velocity field data checkCudaErrors(cudaMalloc((void **)&vxfield, sizeof(cData) * PDS)); checkCudaErrors(cudaMalloc((void **)&vyfield, sizeof(cData) * PDS)); setupTexture(DIM, DIM); // Create particle array particles = (cData *)malloc(sizeof(cData) * DS); memset(particles, 0, sizeof(cData) * DS); initParticles(particles, DIM, DIM); // Create CUFFT transform plan configuration checkCudaErrors(cufftPlan2d(&planr2c, DIM, DIM, CUFFT_R2C)); checkCudaErrors(cufftPlan2d(&planc2r, DIM, DIM, CUFFT_C2R)); runFluidsSimulation(argc, argv, ref_file); if (ref_file) { printf("[fluidsGLES] - Test Results: %d Failures\n", g_TotalErrors); exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } sdkDeleteTimer(&timer); if (!ref_file) { exit(EXIT_SUCCESS); } return 0; }