mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2024-12-01 13:09:15 +08:00
498 lines
14 KiB
C++
498 lines
14 KiB
C++
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
// OpenGL Graphics includes
|
|
#include <helper_gl.h>
|
|
|
|
#if defined(__APPLE__) || defined(MACOSX)
|
|
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
|
#include <GLUT/glut.h>
|
|
#ifndef glutCloseFunc
|
|
#define glutCloseFunc glutWMCloseFunc
|
|
#endif
|
|
#else
|
|
#include <GL/freeglut.h>
|
|
#endif
|
|
|
|
// Includes
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
// CUDA standard includes
|
|
#include <cuda_runtime.h>
|
|
#include <cuda_gl_interop.h>
|
|
|
|
// CUDA FFT Libraries
|
|
#include <cufft.h>
|
|
|
|
// CUDA helper functions
|
|
#include <helper_functions.h>
|
|
#include <rendercheck_gl.h>
|
|
#include <helper_cuda.h>
|
|
|
|
#include "defines.h"
|
|
#include "fluidsGL_kernels.h"
|
|
|
|
#define MAX_EPSILON_ERROR 1.0f
|
|
|
|
const char *sSDKname = "fluidsGL";
|
|
// CUDA example code that implements the frequency space version of
|
|
// Jos Stam's paper 'Stable Fluids' in 2D. This application uses the
|
|
// CUDA FFT library (CUFFT) to perform velocity diffusion and to
|
|
// force non-divergence in the velocity field at each time step. It uses
|
|
// CUDA-OpenGL interoperability to update the particle field directly
|
|
// instead of doing a copy to system memory before drawing. Texture is
|
|
// used for automatic bilinear interpolation at the velocity advection step.
|
|
|
|
void cleanup(void);
|
|
void reshape(int x, int y);
|
|
|
|
// CUFFT plan handle
|
|
cufftHandle planr2c;
|
|
cufftHandle planc2r;
|
|
static cData *vxfield = NULL;
|
|
static cData *vyfield = NULL;
|
|
|
|
cData *hvfield = NULL;
|
|
cData *dvfield = NULL;
|
|
static int wWidth = MAX(512, DIM);
|
|
static int wHeight = MAX(512, DIM);
|
|
|
|
static int clicked = 0;
|
|
static int fpsCount = 0;
|
|
static int fpsLimit = 1;
|
|
StopWatchInterface *timer = NULL;
|
|
|
|
// Particle data
|
|
GLuint vbo = 0; // OpenGL vertex buffer object
|
|
struct cudaGraphicsResource *cuda_vbo_resource; // handles OpenGL-CUDA exchange
|
|
static cData *particles = NULL; // particle positions in host memory
|
|
static int lastx = 0, lasty = 0;
|
|
|
|
// Texture pitch
|
|
size_t tPitch = 0; // Now this is compatible with gcc in 64-bit
|
|
|
|
char *ref_file = NULL;
|
|
bool g_bQAAddTestForce = true;
|
|
int g_iFrameToCompare = 100;
|
|
int g_TotalErrors = 0;
|
|
|
|
bool g_bExitESC = false;
|
|
|
|
// CheckFBO/BackBuffer class objects
|
|
CheckRender *g_CheckRender = NULL;
|
|
|
|
void autoTest(char **);
|
|
|
|
extern "C" void addForces(cData *v, int dx, int dy, int spx, int spy, float fx,
|
|
float fy, int r);
|
|
extern "C" void advectVelocity(cData *v, float *vx, float *vy, int dx, int pdx,
|
|
int dy, float dt);
|
|
extern "C" void diffuseProject(cData *vx, cData *vy, int dx, int dy, float dt,
|
|
float visc);
|
|
extern "C" void updateVelocity(cData *v, float *vx, float *vy, int dx, int pdx,
|
|
int dy);
|
|
extern "C" void advectParticles(GLuint vbo, cData *v, int dx, int dy, float dt);
|
|
|
|
void simulateFluids(void) {
|
|
// simulate fluid
|
|
advectVelocity(dvfield, (float *)vxfield, (float *)vyfield, DIM, RPADW, DIM,
|
|
DT);
|
|
diffuseProject(vxfield, vyfield, CPADW, DIM, DT, VIS);
|
|
updateVelocity(dvfield, (float *)vxfield, (float *)vyfield, DIM, RPADW, DIM);
|
|
advectParticles(vbo, dvfield, DIM, DIM, DT);
|
|
}
|
|
|
|
void display(void) {
|
|
if (!ref_file) {
|
|
sdkStartTimer(&timer);
|
|
simulateFluids();
|
|
}
|
|
|
|
// render points from vertex buffer
|
|
glClear(GL_COLOR_BUFFER_BIT);
|
|
glColor4f(0, 1, 0, 0.5f);
|
|
glPointSize(1);
|
|
glEnable(GL_POINT_SMOOTH);
|
|
glEnable(GL_BLEND);
|
|
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
|
|
glEnableClientState(GL_VERTEX_ARRAY);
|
|
glDisable(GL_DEPTH_TEST);
|
|
glDisable(GL_CULL_FACE);
|
|
glBindBuffer(GL_ARRAY_BUFFER, vbo);
|
|
glVertexPointer(2, GL_FLOAT, 0, NULL);
|
|
glDrawArrays(GL_POINTS, 0, DS);
|
|
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
|
glDisableClientState(GL_VERTEX_ARRAY);
|
|
glDisableClientState(GL_TEXTURE_COORD_ARRAY);
|
|
glDisable(GL_TEXTURE_2D);
|
|
|
|
if (ref_file) {
|
|
return;
|
|
}
|
|
|
|
// Finish timing before swap buffers to avoid refresh sync
|
|
sdkStopTimer(&timer);
|
|
glutSwapBuffers();
|
|
|
|
fpsCount++;
|
|
|
|
if (fpsCount == fpsLimit) {
|
|
char fps[256];
|
|
float ifps = 1.f / (sdkGetAverageTimerValue(&timer) / 1000.f);
|
|
sprintf(fps, "Cuda/GL Stable Fluids (%d x %d): %3.1f fps", DIM, DIM, ifps);
|
|
glutSetWindowTitle(fps);
|
|
fpsCount = 0;
|
|
fpsLimit = (int)MAX(ifps, 1.f);
|
|
sdkResetTimer(&timer);
|
|
}
|
|
|
|
glutPostRedisplay();
|
|
}
|
|
|
|
void autoTest(char **argv) {
|
|
CFrameBufferObject *fbo =
|
|
new CFrameBufferObject(wWidth, wHeight, 4, false, GL_TEXTURE_2D);
|
|
g_CheckRender = new CheckFBO(wWidth, wHeight, 4, fbo);
|
|
g_CheckRender->setPixelFormat(GL_RGBA);
|
|
g_CheckRender->setExecPath(argv[0]);
|
|
g_CheckRender->EnableQAReadback(true);
|
|
|
|
fbo->bindRenderPath();
|
|
|
|
reshape(wWidth, wHeight);
|
|
|
|
for (int count = 0; count < g_iFrameToCompare; count++) {
|
|
simulateFluids();
|
|
|
|
// add in a little force so the automated testing is interesting.
|
|
if (ref_file) {
|
|
int x = wWidth / (count + 1);
|
|
int y = wHeight / (count + 1);
|
|
float fx = (x / (float)wWidth);
|
|
float fy = (y / (float)wHeight);
|
|
int nx = (int)(fx * DIM);
|
|
int ny = (int)(fy * DIM);
|
|
|
|
int ddx = 35;
|
|
int ddy = 35;
|
|
fx = ddx / (float)wWidth;
|
|
fy = ddy / (float)wHeight;
|
|
int spy = ny - FR;
|
|
int spx = nx - FR;
|
|
|
|
addForces(dvfield, DIM, DIM, spx, spy, FORCE * DT * fx, FORCE * DT * fy,
|
|
FR);
|
|
lastx = x;
|
|
lasty = y;
|
|
}
|
|
}
|
|
|
|
display();
|
|
|
|
fbo->unbindRenderPath();
|
|
|
|
// compare to official reference image, printing PASS or FAIL.
|
|
printf("> (Frame %d) Readback BackBuffer\n", 100);
|
|
g_CheckRender->readback(wWidth, wHeight);
|
|
g_CheckRender->savePPM("fluidsGL.ppm", true, NULL);
|
|
|
|
if (!g_CheckRender->PPMvsPPM("fluidsGL.ppm", ref_file, MAX_EPSILON_ERROR,
|
|
0.25f)) {
|
|
g_TotalErrors++;
|
|
}
|
|
}
|
|
|
|
// very simple von neumann middle-square prng. can't use rand() in -qatest
|
|
// mode because its implementation varies across platforms which makes testing
|
|
// for consistency in the important parts of this program difficult.
|
|
float myrand(void) {
|
|
static int seed = 72191;
|
|
char sq[22];
|
|
|
|
if (ref_file) {
|
|
seed *= seed;
|
|
sprintf(sq, "%010d", seed);
|
|
// pull the middle 5 digits out of sq
|
|
sq[8] = 0;
|
|
seed = atoi(&sq[3]);
|
|
|
|
return seed / 99999.f;
|
|
} else {
|
|
return rand() / (float)RAND_MAX;
|
|
}
|
|
}
|
|
|
|
void initParticles(cData *p, int dx, int dy) {
|
|
int i, j;
|
|
|
|
for (i = 0; i < dy; i++) {
|
|
for (j = 0; j < dx; j++) {
|
|
p[i * dx + j].x = (j + 0.5f + (myrand() - 0.5f)) / dx;
|
|
p[i * dx + j].y = (i + 0.5f + (myrand() - 0.5f)) / dy;
|
|
}
|
|
}
|
|
}
|
|
|
|
void keyboard(unsigned char key, int x, int y) {
|
|
switch (key) {
|
|
case 27:
|
|
g_bExitESC = true;
|
|
#if defined(__APPLE__) || defined(MACOSX)
|
|
exit(EXIT_SUCCESS);
|
|
#else
|
|
glutDestroyWindow(glutGetWindow());
|
|
return;
|
|
#endif
|
|
break;
|
|
|
|
case 'r':
|
|
memset(hvfield, 0, sizeof(cData) * DS);
|
|
cudaMemcpy(dvfield, hvfield, sizeof(cData) * DS, cudaMemcpyHostToDevice);
|
|
|
|
initParticles(particles, DIM, DIM);
|
|
|
|
cudaGraphicsUnregisterResource(cuda_vbo_resource);
|
|
|
|
getLastCudaError("cudaGraphicsUnregisterBuffer failed");
|
|
|
|
glBindBuffer(GL_ARRAY_BUFFER, vbo);
|
|
glBufferData(GL_ARRAY_BUFFER, sizeof(cData) * DS, particles,
|
|
GL_DYNAMIC_DRAW_ARB);
|
|
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
|
|
|
cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo,
|
|
cudaGraphicsMapFlagsNone);
|
|
|
|
getLastCudaError("cudaGraphicsGLRegisterBuffer failed");
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
void click(int button, int updown, int x, int y) {
|
|
lastx = x;
|
|
lasty = y;
|
|
clicked = !clicked;
|
|
}
|
|
|
|
void motion(int x, int y) {
|
|
// Convert motion coordinates to domain
|
|
float fx = (lastx / (float)wWidth);
|
|
float fy = (lasty / (float)wHeight);
|
|
int nx = (int)(fx * DIM);
|
|
int ny = (int)(fy * DIM);
|
|
|
|
if (clicked && nx < DIM - FR && nx > FR - 1 && ny < DIM - FR && ny > FR - 1) {
|
|
int ddx = x - lastx;
|
|
int ddy = y - lasty;
|
|
fx = ddx / (float)wWidth;
|
|
fy = ddy / (float)wHeight;
|
|
int spy = ny - FR;
|
|
int spx = nx - FR;
|
|
addForces(dvfield, DIM, DIM, spx, spy, FORCE * DT * fx, FORCE * DT * fy,
|
|
FR);
|
|
lastx = x;
|
|
lasty = y;
|
|
}
|
|
|
|
glutPostRedisplay();
|
|
}
|
|
|
|
void reshape(int x, int y) {
|
|
wWidth = x;
|
|
wHeight = y;
|
|
glViewport(0, 0, x, y);
|
|
glMatrixMode(GL_PROJECTION);
|
|
glLoadIdentity();
|
|
glOrtho(0, 1, 1, 0, 0, 1);
|
|
glMatrixMode(GL_MODELVIEW);
|
|
glLoadIdentity();
|
|
glutPostRedisplay();
|
|
}
|
|
|
|
void cleanup(void) {
|
|
cudaGraphicsUnregisterResource(cuda_vbo_resource);
|
|
|
|
deleteTexture();
|
|
|
|
// Free all host and device resources
|
|
free(hvfield);
|
|
free(particles);
|
|
cudaFree(dvfield);
|
|
cudaFree(vxfield);
|
|
cudaFree(vyfield);
|
|
cufftDestroy(planr2c);
|
|
cufftDestroy(planc2r);
|
|
|
|
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
|
glDeleteBuffers(1, &vbo);
|
|
|
|
sdkDeleteTimer(&timer);
|
|
}
|
|
|
|
int initGL(int *argc, char **argv) {
|
|
glutInit(argc, argv);
|
|
glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
|
|
glutInitWindowSize(wWidth, wHeight);
|
|
glutCreateWindow("Compute Stable Fluids");
|
|
glutDisplayFunc(display);
|
|
glutKeyboardFunc(keyboard);
|
|
glutMouseFunc(click);
|
|
glutMotionFunc(motion);
|
|
glutReshapeFunc(reshape);
|
|
|
|
if (!isGLVersionSupported(1, 5)) {
|
|
fprintf(stderr, "ERROR: Support for OpenGL 1.5 is missing");
|
|
fflush(stderr);
|
|
return false;
|
|
}
|
|
|
|
if (!areGLExtensionsSupported("GL_ARB_vertex_buffer_object")) {
|
|
fprintf(stderr, "ERROR: Support for necessary OpenGL extensions missing.");
|
|
fflush(stderr);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
int main(int argc, char **argv) {
|
|
int devID;
|
|
cudaDeviceProp deviceProps;
|
|
|
|
#if defined(__linux__)
|
|
char *Xstatus = getenv("DISPLAY");
|
|
if (Xstatus == NULL) {
|
|
printf("Waiving execution as X server is not running\n");
|
|
exit(EXIT_WAIVED);
|
|
}
|
|
setenv("DISPLAY", ":0", 0);
|
|
#endif
|
|
|
|
printf("%s Starting...\n\n", sSDKname);
|
|
|
|
printf(
|
|
"NOTE: The CUDA Samples are not meant for performance measurements. "
|
|
"Results may vary when GPU Boost is enabled.\n\n");
|
|
|
|
// First initialize OpenGL context, so we can properly set the GL for CUDA.
|
|
// This is necessary in order to achieve optimal performance with OpenGL/CUDA
|
|
// interop.
|
|
if (false == initGL(&argc, argv)) {
|
|
exit(EXIT_SUCCESS);
|
|
}
|
|
|
|
// use command-line specified CUDA device, otherwise use device with highest
|
|
// Gflops/s
|
|
devID = findCudaDevice(argc, (const char **)argv);
|
|
|
|
// get number of SMs on this GPU
|
|
checkCudaErrors(cudaGetDeviceProperties(&deviceProps, devID));
|
|
printf("CUDA device [%s] has %d Multi-Processors\n", deviceProps.name,
|
|
deviceProps.multiProcessorCount);
|
|
|
|
// automated build testing harness
|
|
if (checkCmdLineFlag(argc, (const char **)argv, "file")) {
|
|
getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
|
|
}
|
|
|
|
// Allocate and initialize host data
|
|
GLint bsize;
|
|
|
|
sdkCreateTimer(&timer);
|
|
sdkResetTimer(&timer);
|
|
|
|
hvfield = (cData *)malloc(sizeof(cData) * DS);
|
|
memset(hvfield, 0, sizeof(cData) * DS);
|
|
|
|
// Allocate and initialize device data
|
|
cudaMallocPitch((void **)&dvfield, &tPitch, sizeof(cData) * DIM, DIM);
|
|
|
|
cudaMemcpy(dvfield, hvfield, sizeof(cData) * DS, cudaMemcpyHostToDevice);
|
|
// Temporary complex velocity field data
|
|
cudaMalloc((void **)&vxfield, sizeof(cData) * PDS);
|
|
cudaMalloc((void **)&vyfield, sizeof(cData) * PDS);
|
|
|
|
setupTexture(DIM, DIM);
|
|
|
|
// Create particle array
|
|
particles = (cData *)malloc(sizeof(cData) * DS);
|
|
memset(particles, 0, sizeof(cData) * DS);
|
|
|
|
initParticles(particles, DIM, DIM);
|
|
|
|
// Create CUFFT transform plan configuration
|
|
checkCudaErrors(cufftPlan2d(&planr2c, DIM, DIM, CUFFT_R2C));
|
|
checkCudaErrors(cufftPlan2d(&planc2r, DIM, DIM, CUFFT_C2R));
|
|
|
|
glGenBuffers(1, &vbo);
|
|
glBindBuffer(GL_ARRAY_BUFFER, vbo);
|
|
glBufferData(GL_ARRAY_BUFFER, sizeof(cData) * DS, particles,
|
|
GL_DYNAMIC_DRAW_ARB);
|
|
|
|
glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, &bsize);
|
|
|
|
if (bsize != (sizeof(cData) * DS)) goto EXTERR;
|
|
|
|
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
|
|
|
checkCudaErrors(cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo,
|
|
cudaGraphicsMapFlagsNone));
|
|
getLastCudaError("cudaGraphicsGLRegisterBuffer failed");
|
|
|
|
if (ref_file) {
|
|
autoTest(argv);
|
|
cleanup();
|
|
|
|
printf("[fluidsGL] - Test Results: %d Failures\n", g_TotalErrors);
|
|
exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
|
|
|
|
} else {
|
|
#if defined(__APPLE__) || defined(MACOSX)
|
|
atexit(cleanup);
|
|
#else
|
|
glutCloseFunc(cleanup);
|
|
#endif
|
|
glutMainLoop();
|
|
}
|
|
|
|
if (!ref_file) {
|
|
exit(EXIT_SUCCESS);
|
|
}
|
|
|
|
return 0;
|
|
|
|
EXTERR:
|
|
printf("Failed to initialize GL extensions.\n");
|
|
|
|
exit(EXIT_FAILURE);
|
|
}
|