cuda-samples/Samples/Mandelbrot/Mandelbrot.cpp

1273 lines
35 KiB
C++
Raw Normal View History

2021-10-21 19:04:49 +08:00
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
Mandelbrot sample
submitted by Mark Granger, NewTek
CUDA 2.0 SDK - updated with double precision support
CUDA 2.1 SDK - updated to demonstrate software block scheduling using
atomics
CUDA 2.2 SDK - updated with drawing of Julia sets by Konstantin Kolchin,
NVIDIA
*/
// OpenGL Graphics includes
#include <helper_gl.h>
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
#include <GL/wglew.h>
#endif
#if defined(__APPLE__) || defined(__MACOSX)
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
#include <GLUT/glut.h>
#ifndef glutCloseFunc
#define glutCloseFunc glutWMCloseFunc
#endif
#else
#include <GL/freeglut.h>
#endif
// CUDA runtime
// CUDA utilities and system includes
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
#include <helper_functions.h>
#include <helper_cuda.h>
// Includes
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cstdio>
#include "Mandelbrot_kernel.h"
#include "Mandelbrot_gold.h"
#define MAX_EPSILON_ERROR 5.0f
// Define the files that are to be save and the reference images for validation
const char *sOriginal[] = {"mandelbrot.ppm", "julia.ppm", NULL};
const char *sReference[] = {"Mandelbrot_fp32.ppm", "Mandelbrot_fp64.ppm", NULL};
const char *sReferenceJulia[] = {"referenceJulia_fp32.ppm",
"referenceJulia_fp64.ppm", NULL};
bool g_isJuliaSet = false;
bool g_isMoving = true;
bool g_runCPU = false;
FILE *stream;
char g_ExecPath[300];
// Set to 1 to run on the CPU instead of the GPU for timing comparison.
#define RUN_CPU 0
// Set to 1 to time frame generation
#define RUN_TIMING 0
// Random number macros
#define RANDOMSEED(seed) ((seed) = ((seed)*1103515245 + 12345))
#define RANDOMBITS(seed, bits) ((unsigned int)RANDOMSEED(seed) >> (32 - (bits)))
// OpenGL PBO and texture "names"
GLuint gl_PBO, gl_Tex, gl_Shader;
struct cudaGraphicsResource *cuda_pbo_resource; // handles OpenGL-CUDA exchange
// Source image on the host side
uchar4 *h_Src = 0;
// Destination image on the GPU side
uchar4 *d_dst = NULL;
// Original image width and height
int imageW = 800, imageH = 600;
// Starting iteration limit
int crunch = 512;
// Starting position and scale
double xOff = -0.5;
double yOff = 0.0;
double scale = 3.2;
// Starting stationary position and scale motion
double xdOff = 0.0;
double ydOff = 0.0;
double dscale = 1.0;
// Julia parameter
double xJParam = 0.0;
double yJParam = 0.0;
// Precision mode
// 0=single precision, 1=double single, 2=double
int precisionMode = 0;
// Starting animation frame and anti-aliasing pass
int animationFrame = 0;
int animationStep = 0;
int pass = 0;
// Starting color multipliers and random seed
int colorSeed = 0;
uchar4 colors;
// Timer ID
StopWatchInterface *hTimer = NULL;
// User interface variables
int lastx = 0;
int lasty = 0;
bool leftClicked = false;
bool middleClicked = false;
bool rightClicked = false;
bool haveDoubles = true;
int numSMs = 0; // number of multiprocessors
int version = 1; // Compute Capability
// Auto-Verification Code
const int frameCheckNumber = 60;
int fpsCount = 0; // FPS count for averaging
int fpsLimit = 15; // FPS limit for sampling
unsigned int frameCount = 0;
unsigned int g_TotalErrors = 0;
int *pArgc = NULL;
char **pArgv = NULL;
const char *sSDKsample = "CUDA Mandelbrot/Julia Set";
#define MAX_EPSILON 50
#define REFRESH_DELAY 10 // ms
#ifndef MAX
#define MAX(a, b) ((a > b) ? a : b)
#endif
#define BUFFER_DATA(i) ((char *)0 + i)
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
// This is specifically to enable the application to enable/disable vsync
typedef BOOL(WINAPI *PFNWGLSWAPINTERVALFARPROC)(int);
void setVSync(int interval) {
if (WGL_EXT_swap_control) {
wglSwapIntervalEXT =
(PFNWGLSWAPINTERVALFARPROC)wglGetProcAddress("wglSwapIntervalEXT");
wglSwapIntervalEXT(interval);
}
}
#endif
void computeFPS() {
frameCount++;
fpsCount++;
if (fpsCount == fpsLimit) {
char fps[256];
float ifps = 1.f / (sdkGetAverageTimerValue(&hTimer) / 1000.f);
sprintf(fps, "<CUDA %s Set> %3.1f fps",
g_isJuliaSet ? "Julia" : "Mandelbrot", ifps);
glutSetWindowTitle(fps);
fpsCount = 0;
fpsLimit = (int)MAX(1.f, (float)ifps);
sdkResetTimer(&hTimer);
}
}
void startJulia(const char *path) {
g_isJuliaSet = true;
g_isMoving = false;
if ((path == NULL) || (stream = fopen(path, "r")) == NULL) {
printf(
"JuliaSet: params.txt could not be opened. Using default "
"parameters\n");
xOff = -0.085760;
yOff = 0.007040;
scale = 3.200000;
xJParam = -0.172400;
yJParam = -0.652693;
} else {
fseek(stream, 0L, SEEK_SET);
fscanf(stream, "%lf %lf %lf %lf %lf", &xOff, &yOff, &scale, &xJParam,
&yJParam);
fclose(stream);
}
xdOff = 0.0;
ydOff = 0.0;
dscale = 1.0;
pass = 0;
}
// Get a sub-pixel sample location
void GetSample(int sampleIndex, float &x, float &y) {
static const unsigned char pairData[128][2] = {
{64, 64}, {0, 0}, {1, 63}, {63, 1}, {96, 32}, {97, 95},
{36, 96}, {30, 31}, {95, 127}, {4, 97}, {33, 62}, {62, 33},
{31, 126}, {67, 99}, {99, 65}, {2, 34}, {81, 49}, {19, 80},
{113, 17}, {112, 112}, {80, 16}, {115, 81}, {46, 15}, {82, 79},
{48, 78}, {16, 14}, {49, 113}, {114, 48}, {45, 45}, {18, 47},
{20, 109}, {79, 115}, {65, 82}, {52, 94}, {15, 124}, {94, 111},
{61, 18}, {47, 30}, {83, 100}, {98, 50}, {110, 2}, {117, 98},
{50, 59}, {77, 35}, {3, 114}, {5, 77}, {17, 66}, {32, 13},
{127, 20}, {34, 76}, {35, 110}, {100, 12}, {116, 67}, {66, 46},
{14, 28}, {23, 93}, {102, 83}, {86, 61}, {44, 125}, {76, 3},
{109, 36}, {6, 51}, {75, 89}, {91, 21}, {60, 117}, {29, 43},
{119, 29}, {74, 70}, {126, 87}, {93, 75}, {71, 24}, {106, 102},
{108, 58}, {89, 9}, {103, 23}, {72, 56}, {120, 8}, {88, 40},
{11, 88}, {104, 120}, {57, 105}, {118, 122}, {53, 6}, {125, 44},
{43, 68}, {58, 73}, {24, 22}, {22, 5}, {40, 86}, {122, 108},
{87, 90}, {56, 42}, {70, 121}, {8, 7}, {37, 52}, {25, 55},
{69, 11}, {10, 106}, {12, 38}, {26, 69}, {27, 116}, {38, 25},
{59, 54}, {107, 72}, {121, 57}, {39, 37}, {73, 107}, {85, 123},
{28, 103}, {123, 74}, {55, 85}, {101, 41}, {42, 104}, {84, 27},
{111, 91}, {9, 19}, {21, 39}, {90, 53}, {41, 60}, {54, 26},
{92, 119}, {51, 71}, {124, 101}, {68, 92}, {78, 10}, {13, 118},
{7, 84}, {105, 4}};
x = (1.0f / 128.0f) * (0.5f + (float)pairData[sampleIndex][0]);
y = (1.0f / 128.0f) * (0.5f + (float)pairData[sampleIndex][1]);
} // GetSample
// render Mandelbrot image using CUDA or CPU
void renderImage(bool bUseOpenGL, bool fp64, int mode) {
#if RUN_TIMING
pass = 0;
#endif
if (pass < 128) {
if (g_runCPU) {
int startPass = pass;
float xs, ys;
sdkResetTimer(&hTimer);
if (bUseOpenGL) {
// DEPRECATED: checkCudaErrors(cudaGLMapBufferObject((void**)&d_dst,
// gl_PBO));
checkCudaErrors(cudaGraphicsMapResources(1, &cuda_pbo_resource, 0));
size_t num_bytes;
checkCudaErrors(cudaGraphicsResourceGetMappedPointer(
(void **)&d_dst, &num_bytes, cuda_pbo_resource));
}
// Get the anti-alias sub-pixel sample location
GetSample(pass & 127, xs, ys);
// Get the pixel scale and offset
double s = scale / (double)imageW;
double x = (xs - (double)imageW * 0.5f) * s + xOff;
double y = (ys - (double)imageH * 0.5f) * s + yOff;
// Run the mandelbrot generator
// Use the adaptive sampling version when animating.
if (pass && !startPass) {
if (precisionMode)
RunMandelbrotDSGold1(h_Src, imageW, imageH, crunch, x, y, xJParam,
yJParam, s, colors, pass++, animationFrame,
g_isJuliaSet);
else
RunMandelbrotGold1(h_Src, imageW, imageH, crunch, (float)x, (float)y,
(float)xJParam, (float)yJParam, (float)s, colors,
pass++, animationFrame, g_isJuliaSet);
} else {
if (precisionMode)
RunMandelbrotDSGold0(h_Src, imageW, imageH, crunch, x, y, xJParam,
yJParam, s, colors, pass++, animationFrame,
g_isJuliaSet);
else
RunMandelbrotGold0(h_Src, imageW, imageH, crunch, (float)x, (float)y,
(float)xJParam, (float)yJParam, (float)s, colors,
pass++, animationFrame, g_isJuliaSet);
}
checkCudaErrors(cudaMemcpy(d_dst, h_Src, imageW * imageH * sizeof(uchar4),
cudaMemcpyHostToDevice));
if (bUseOpenGL) {
// DEPRECATED: checkCudaErrors(cudaGLUnmapBufferObject(gl_PBO));
checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0));
}
#if RUN_TIMING
printf("CPU = %5.8f\n", 0.001f * sdkGetTimerValue(&hTimer));
#endif
} else { // this is the GPU Path
float timeEstimate;
int startPass = pass;
sdkResetTimer(&hTimer);
if (bUseOpenGL) {
// DEPRECATED: checkCudaErrors(cudaGLMapBufferObject((void**)&d_dst,
// gl_PBO));
checkCudaErrors(cudaGraphicsMapResources(1, &cuda_pbo_resource, 0));
size_t num_bytes;
checkCudaErrors(cudaGraphicsResourceGetMappedPointer(
(void **)&d_dst, &num_bytes, cuda_pbo_resource));
}
// Render anti-aliasing passes until we run out time (60fps approximately)
do {
float xs, ys;
// Get the anti-alias sub-pixel sample location
GetSample(pass & 127, xs, ys);
// Get the pixel scale and offset
double s = scale / (float)imageW;
double x = (xs - (double)imageW * 0.5f) * s + xOff;
double y = (ys - (double)imageH * 0.5f) * s + yOff;
// Run the mandelbrot generator
// Use the adaptive sampling version when animating.
if (pass && !startPass)
RunMandelbrot1(d_dst, imageW, imageH, crunch, x, y, xJParam, yJParam,
s, colors, pass++, animationFrame, precisionMode,
numSMs, g_isJuliaSet, version);
else
RunMandelbrot0(d_dst, imageW, imageH, crunch, x, y, xJParam, yJParam,
s, colors, pass++, animationFrame, precisionMode,
numSMs, g_isJuliaSet, version);
// Estimate the total time of the frame if one more pass is rendered
timeEstimate =
0.1f * sdkGetTimerValue(&hTimer) *
((float)(pass + 1 - startPass) / (float)(pass - startPass));
} while ((pass < 128) && (timeEstimate < 1.0f / 60.0f) && !RUN_TIMING);
if (bUseOpenGL) {
// DEPRECATED: checkCudaErrors(cudaGLUnmapBufferObject(gl_PBO));
checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0));
}
#if RUN_TIMING
printf("GPU = %5.8f\n", 0.001f * sdkGetTimerValue(&hTimer);
#endif
}
}
}
// OpenGL display function
void displayFunc(void) {
sdkStartTimer(&hTimer);
if ((xdOff != 0.0) || (ydOff != 0.0)) {
if (g_isMoving || !g_isJuliaSet) {
xOff += xdOff;
yOff += ydOff;
} else {
xJParam += xdOff;
yJParam += ydOff;
}
pass = 0;
}
if (dscale != 1.0) {
scale *= dscale;
pass = 0;
}
if (animationStep) {
animationFrame -= animationStep;
pass = 0;
}
// render the Mandelbrot image
renderImage(true, g_isJuliaSet, precisionMode);
// load texture from PBO
// glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, gl_PBO);
glBindTexture(GL_TEXTURE_2D, gl_Tex);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageW, imageH, GL_RGBA,
GL_UNSIGNED_BYTE, BUFFER_DATA(0));
// glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
// fragment program is required to display floating point texture
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, gl_Shader);
glEnable(GL_FRAGMENT_PROGRAM_ARB);
glDisable(GL_DEPTH_TEST);
glBegin(GL_QUADS);
glTexCoord2f(0.0f, 0.0f);
glVertex2f(0.0f, 0.0f);
glTexCoord2f(1.0f, 0.0f);
glVertex2f(1.0f, 0.0f);
glTexCoord2f(1.0f, 1.0f);
glVertex2f(1.0f, 1.0f);
glTexCoord2f(0.0f, 1.0f);
glVertex2f(0.0f, 1.0f);
glEnd();
glBindTexture(GL_TEXTURE_2D, 0);
glDisable(GL_FRAGMENT_PROGRAM_ARB);
sdkStopTimer(&hTimer);
glutSwapBuffers();
computeFPS();
} // displayFunc
void cleanup() {
if (h_Src) {
free(h_Src);
h_Src = 0;
}
sdkStopTimer(&hTimer);
sdkDeleteTimer(&hTimer);
// DEPRECATED: checkCudaErrors(cudaGLUnregisterBufferObject(gl_PBO));
checkCudaErrors(cudaGraphicsUnregisterResource(cuda_pbo_resource));
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
glDeleteBuffers(1, &gl_PBO);
glDeleteTextures(1, &gl_Tex);
glDeleteProgramsARB(1, &gl_Shader);
}
void initMenus();
// OpenGL keyboard function
void keyboardFunc(unsigned char k, int, int) {
int seed;
switch (k) {
case '\033':
case 'q':
case 'Q':
printf("Shutting down...\n");
#if defined(__APPLE__) || defined(MACOSX)
exit(EXIT_SUCCESS);
#else
glutDestroyWindow(glutGetWindow());
return;
#endif
break;
case '?':
printf("xOff = %5.8f\n", xOff);
printf("yOff = %5.8f\n", yOff);
printf("scale = %e\n", scale);
printf("detail = %d\n", crunch);
printf("color = %d\n", colorSeed);
printf("xJParam = %5.8f\n", xJParam);
printf("yJParam = %5.8f\n", yJParam);
printf("\n");
break;
case 'e':
case 'E':
// Reset all values to their defaults
g_isJuliaSet = false;
g_isMoving = true;
g_runCPU = false;
printf(
"All parameters are reset to defaults. GPU implementation is "
"used.\n");
xOff = -0.5;
yOff = 0.0;
scale = 3.2;
xdOff = 0.0;
ydOff = 0.0;
dscale = 1.0;
colorSeed = 0;
colors.x = 3;
colors.y = 5;
colors.z = 7;
crunch = 512;
animationFrame = 0;
animationStep = 0;
xJParam = 0.0;
yJParam = 0.0;
pass = 0;
break;
case 'c':
seed = ++colorSeed;
if (seed) {
colors.x = RANDOMBITS(seed, 4);
colors.y = RANDOMBITS(seed, 4);
colors.z = RANDOMBITS(seed, 4);
} else {
colors.x = 3;
colors.y = 5;
colors.z = 7;
}
pass = 0;
break;
case 'C':
seed = --colorSeed;
if (seed) {
colors.x = RANDOMBITS(seed, 4);
colors.y = RANDOMBITS(seed, 4);
colors.z = RANDOMBITS(seed, 4);
} else {
colors.x = 3;
colors.y = 5;
colors.z = 7;
}
pass = 0;
break;
case 'a':
if (animationStep < 0) {
animationStep = 0;
} else {
animationStep++;
if (animationStep > 8) {
animationStep = 8;
}
}
break;
case 'A':
if (animationStep > 0) {
animationStep = 0;
} else {
animationStep--;
if (animationStep < -8) {
animationStep = -8;
}
}
break;
case 'd':
if (2 * crunch <= MIN(numSMs * (version < 20 ? 512 : 2048), 0x4000)) {
crunch *= 2;
pass = 0;
}
printf("detail = %d\n", crunch);
break;
case 'D':
if (crunch > 2) {
crunch /= 2;
pass = 0;
}
printf("detail = %d\n", crunch);
break;
case 'r':
colors.x -= 1;
pass = 0;
break;
case 'R':
colors.x += 1;
pass = 0;
break;
case 'g':
colors.y -= 1;
pass = 0;
break;
case 'G':
colors.y += 1;
pass = 0;
break;
case 'b':
colors.z -= 1;
pass = 0;
break;
case 'B':
colors.z += 1;
pass = 0;
break;
case 's':
case 'S':
if (g_runCPU) {
g_runCPU = false;
printf("GPU implementation\n");
} else {
g_runCPU = true;
printf("CPU implementation\n");
}
pass = 0;
glutDestroyMenu(glutGetMenu());
initMenus();
break;
case 'j':
case 'J':
// toggle between Mandelbrot and Julia sets and reset all parameters
if (!g_isJuliaSet) { // settings for Julia
g_isJuliaSet = true;
startJulia("params.txt");
} else { // settings for Mandelbrot
g_isJuliaSet = false;
g_isMoving = true;
xOff = -0.5;
yOff = 0.0;
scale = 3.2;
xdOff = 0.0;
ydOff = 0.0;
dscale = 1.0;
colorSeed = 0;
colors.x = 3;
colors.y = 5;
colors.z = 7;
crunch = 512;
animationFrame = 0;
animationStep = 0;
pass = 0;
}
char fps[30];
sprintf(fps, "<CUDA %s Set>", g_isJuliaSet ? "Julia" : "Mandelbrot");
glutSetWindowTitle(fps);
break;
case 'm':
case 'M':
if (g_isJuliaSet) {
g_isMoving = !g_isMoving;
pass = 0;
}
break;
case 'p':
case 'P':
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
if (fopen_s(&stream, "params.txt", "w") != 0)
#else
if ((stream = fopen("params.txt", "w")) == NULL)
#endif
{
printf("The file params.txt was not opened\n");
break;
}
fprintf(stream, "%f %f %f %f %f\n", xOff, yOff, scale, xJParam, yJParam);
fclose(stream);
break;
case 'o':
case 'O':
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
if (fopen_s(&stream, "params.txt", "r") != 0)
#else
if ((stream = fopen("params.txt", "r")) == NULL)
#endif
{
printf("The file params.txt was not opened\n");
break;
}
fseek(stream, 0L, SEEK_SET);
fscanf(stream, "%lf %lf %lf %lf %lf", &xOff, &yOff, &scale, &xJParam,
&yJParam);
xdOff = 0.0;
ydOff = 0.0;
dscale = 1.0;
fclose(stream);
pass = 0;
break;
case '4': // Left arrow key
xOff -= 0.05f * scale;
pass = 0;
break;
case '8': // Up arrow key
yOff += 0.05f * scale;
pass = 0;
break;
case '6': // Right arrow key
xOff += 0.05f * scale;
pass = 0;
break;
case '2': // Down arrow key
yOff -= 0.05f * scale;
pass = 0;
break;
case '+':
scale /= 1.1f;
pass = 0;
break;
case '-':
scale *= 1.1f;
pass = 0;
break;
default:
break;
}
} // keyboardFunc
// OpenGL mouse click function
void clickFunc(int button, int state, int x, int y) {
if (button == 0) {
leftClicked = !leftClicked;
}
if (button == 1) {
middleClicked = !middleClicked;
}
if (button == 2) {
rightClicked = !rightClicked;
}
int modifiers = glutGetModifiers();
if (leftClicked && (modifiers & GLUT_ACTIVE_SHIFT)) {
leftClicked = 0;
middleClicked = 1;
}
if (state == GLUT_UP) {
leftClicked = 0;
middleClicked = 0;
}
lastx = x;
lasty = y;
xdOff = 0.0;
ydOff = 0.0;
dscale = 1.0;
} // clickFunc
// OpenGL mouse motion function
void motionFunc(int x, int y) {
double fx = (double)(x - lastx) / 50.0 / (double)(imageW);
double fy = (double)(lasty - y) / 50.0 / (double)(imageH);
if (leftClicked) {
xdOff = fx * scale;
ydOff = fy * scale;
} else {
xdOff = 0.0f;
ydOff = 0.0f;
}
if (middleClicked)
if (fy > 0.0f) {
dscale = 1.0 - fy;
dscale = dscale < 1.05 ? dscale : 1.05;
} else {
dscale = 1.0 / (1.0 + fy);
dscale = dscale > (1.0 / 1.05) ? dscale : (1.0 / 1.05);
}
else {
dscale = 1.0;
}
} // motionFunc
void timerEvent(int value) {
if (glutGetWindow()) {
glutPostRedisplay();
glutTimerFunc(REFRESH_DELAY, timerEvent, 0);
}
}
void mainMenu(int i) {
precisionMode = i;
pass = 0;
}
void initMenus() {
glutCreateMenu(mainMenu);
if (!g_runCPU) {
glutAddMenuEntry("Hardware single precision", 0);
if (numSMs > 2) {
glutAddMenuEntry("Emulated double-single precision", 1);
}
if (haveDoubles) {
glutAddMenuEntry("Hardware double precision", 2);
}
} else {
glutAddMenuEntry("Software single precision", 0);
glutAddMenuEntry("Software double precision", 1);
}
glutAttachMenu(GLUT_RIGHT_BUTTON);
}
// gl_Shader for displaying floating-point texture
static const char *shader_code =
"!!ARBfp1.0\n"
"TEX result.color, fragment.texcoord, texture[0], 2D; \n"
"END";
GLuint compileASMShader(GLenum program_type, const char *code) {
GLuint program_id;
glGenProgramsARB(1, &program_id);
glBindProgramARB(program_type, program_id);
glProgramStringARB(program_type, GL_PROGRAM_FORMAT_ASCII_ARB,
(GLsizei)strlen(code), (GLubyte *)code);
GLint error_pos;
glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &error_pos);
if (error_pos != -1) {
const GLubyte *error_string;
error_string = glGetString(GL_PROGRAM_ERROR_STRING_ARB);
fprintf(stderr, "Program error at position: %d\n%s\n", (int)error_pos,
error_string);
return 0;
}
return program_id;
}
void initOpenGLBuffers(int w, int h) {
// delete old buffers
if (h_Src) {
free(h_Src);
h_Src = 0;
}
if (gl_Tex) {
glDeleteTextures(1, &gl_Tex);
gl_Tex = 0;
}
if (gl_PBO) {
// DEPRECATED: checkCudaErrors(cudaGLUnregisterBufferObject(gl_PBO));
cudaGraphicsUnregisterResource(cuda_pbo_resource);
glDeleteBuffers(1, &gl_PBO);
gl_PBO = 0;
}
// allocate new buffers
h_Src = (uchar4 *)malloc(w * h * 4);
printf("Creating GL texture...\n");
glEnable(GL_TEXTURE_2D);
glGenTextures(1, &gl_Tex);
glBindTexture(GL_TEXTURE_2D, gl_Tex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE,
h_Src);
printf("Texture created.\n");
printf("Creating PBO...\n");
glGenBuffers(1, &gl_PBO);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, gl_PBO);
glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, w * h * 4, h_Src, GL_STREAM_COPY);
// While a PBO is registered to CUDA, it can't be used
// as the destination for OpenGL drawing calls.
// But in our particular case OpenGL is only used
// to display the content of the PBO, specified by CUDA kernels,
// so we need to register/unregister it only once.
// DEPRECATED: checkCudaErrors( cudaGLRegisterBufferObject(gl_PBO) );
checkCudaErrors(cudaGraphicsGLRegisterBuffer(
&cuda_pbo_resource, gl_PBO, cudaGraphicsMapFlagsWriteDiscard));
printf("PBO created.\n");
// load shader program
gl_Shader = compileASMShader(GL_FRAGMENT_PROGRAM_ARB, shader_code);
}
void reshapeFunc(int w, int h) {
glViewport(0, 0, w, h);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
if (w != 0 && h != 0) // Do not call when window is minimized that is when
// width && height == 0
initOpenGLBuffers(w, h);
imageW = w;
imageH = h;
pass = 0;
glutPostRedisplay();
}
void initGL(int *argc, char **argv) {
printf("Initializing GLUT...\n");
glutInit(argc, argv);
glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
glutInitWindowSize(imageW, imageH);
glutInitWindowPosition(0, 0);
glutCreateWindow(argv[0]);
glutDisplayFunc(displayFunc);
glutKeyboardFunc(keyboardFunc);
glutMouseFunc(clickFunc);
glutMotionFunc(motionFunc);
glutReshapeFunc(reshapeFunc);
glutTimerFunc(REFRESH_DELAY, timerEvent, 0);
initMenus();
if (!isGLVersionSupported(1, 5) ||
!areGLExtensionsSupported(
"GL_ARB_vertex_buffer_object GL_ARB_pixel_buffer_object")) {
fprintf(stderr, "Error: failed to get minimal extensions for demo\n");
fprintf(stderr, "This sample requires:\n");
fprintf(stderr, " OpenGL version 1.5\n");
fprintf(stderr, " GL_ARB_vertex_buffer_object\n");
fprintf(stderr, " GL_ARB_pixel_buffer_object\n");
exit(EXIT_SUCCESS);
}
printf("OpenGL window created.\n");
}
void initData(int argc, char **argv) {
// check for hardware double precision support
int dev = 0;
dev = findCudaDevice(argc, (const char **)argv);
cudaDeviceProp deviceProp;
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev));
version = deviceProp.major * 10 + deviceProp.minor;
numSMs = deviceProp.multiProcessorCount;
// initialize some of the arguments
if (checkCmdLineFlag(argc, (const char **)argv, "xOff")) {
xOff = getCmdLineArgumentFloat(argc, (const char **)argv, "xOff");
}
if (checkCmdLineFlag(argc, (const char **)argv, "yOff")) {
yOff = getCmdLineArgumentFloat(argc, (const char **)argv, "yOff");
}
if (checkCmdLineFlag(argc, (const char **)argv, "scale")) {
scale = getCmdLineArgumentFloat(argc, (const char **)argv, "xOff");
}
colors.w = 0;
colors.x = 3;
colors.y = 5;
colors.z = 7;
printf("Data initialization done.\n");
}
////////////////////////////////////////////////////////////////////////////////
// runAutoTest validates the Mandelbrot and Julia sets without using OpenGL
////////////////////////////////////////////////////////////////////////////////
int runSingleTest(int argc, char **argv) {
char dump_file[256], *ref_file = NULL;
bool haveDouble = false;
printf("* Running Automatic Test: <%s>\n", sSDKsample);
strcpy(dump_file, (const char *)"rendered_image.ppm");
// We've already determined that file has been passed in as input, we can grab
// the file here
getCmdLineArgumentString(argc, (const char **)argv, "file",
(char **)&ref_file);
if (checkCmdLineFlag(argc, (const char **)argv, "fp64")) {
haveDouble = true;
}
// initialize Data for CUDA
initData(argc, argv);
// Allocate memory for renderImage (to be able to render into a CUDA memory
// buffer)
checkCudaErrors(
cudaMalloc((void **)&d_dst, (imageW * imageH * sizeof(uchar4))));
// Allocate memory for cpu buffer
unsigned char *h_dst =
(unsigned char *)malloc(sizeof(uchar4) * imageH * imageW);
if (g_isJuliaSet) {
char *ref_path = sdkFindFilePath("params.txt", argv[0]);
startJulia(ref_path);
for (int i = 0; i < 50; i++) {
renderImage(false, haveDouble, 0);
}
checkCudaErrors(cudaMemcpy(h_dst, d_dst, imageW * imageH * sizeof(uchar4),
cudaMemcpyDeviceToHost));
sdkSavePPM4ub(dump_file, h_dst, imageW, imageH);
} else {
// Mandelbrot Set
for (int i = 0; i < 50; i++) {
renderImage(false, haveDouble, 0);
}
checkCudaErrors(cudaMemcpy(h_dst, d_dst, imageW * imageH * sizeof(uchar4),
cudaMemcpyDeviceToHost));
sdkSavePPM4ub(dump_file, h_dst, imageW, imageH);
}
printf("\n[%s], %s Set, %s -> Saved File\n", dump_file,
(g_isJuliaSet ? "Julia" : "Mandelbrot"),
(haveDouble ? "(fp64 double precision)" : "(fp32 single precision)"));
if (!sdkComparePPM(dump_file, sdkFindFilePath(ref_file, argv[0]),
MAX_EPSILON_ERROR, 0.15f, false)) {
printf("Images \"%s\", \"%s\" are different\n", ref_file, dump_file);
g_TotalErrors++;
} else {
printf("Images \"%s\", \"%s\" are matching\n", ref_file, dump_file);
}
checkCudaErrors(cudaFree(d_dst));
free(h_dst);
return true;
}
// Performance Test
void runBenchmark(int argc, char **argv) {
int N = 1000;
// initialize Data for CUDA
initData(argc, argv);
printf("\n* Run Performance Test\n");
printf("Image Size %d x %d\n", imageW, imageH);
printf("Double Precision\n");
printf("%d Iterations\n", N);
// Allocate memory for renderImage (to be able to render into a CUDA memory
// buffer)
checkCudaErrors(
cudaMalloc((void **)&d_dst, (imageW * imageH * sizeof(uchar4))));
float xs, ys;
// Get the anti-alias sub-pixel sample location
GetSample(0, xs, ys);
double s = scale / (float)imageW;
double x = (xs - (double)imageW * 0.5f) * s + xOff;
double y = (ys - (double)imageH * 0.5f) * s + yOff;
// Create Timers
StopWatchInterface *kernel_timer = NULL;
sdkCreateTimer(&kernel_timer);
sdkStartTimer(&kernel_timer);
// render Mandelbrot set and verify
for (int i = 0; i < N; i++) {
RunMandelbrot0(d_dst, imageW, imageH, crunch, x, y, xJParam, yJParam, s,
colors, pass++, animationFrame, 2, numSMs, g_isJuliaSet,
version);
cudaDeviceSynchronize();
}
sdkStopTimer(&hTimer);
float ExecutionTime = sdkGetTimerValue(&kernel_timer);
float PixelsPerSecond =
(float)imageW * (float)imageH * N / (ExecutionTime / 1000.0f);
printf("\nMegaPixels Per Second %.4f\n", PixelsPerSecond / 1e6);
checkCudaErrors(cudaFree(d_dst));
sdkDeleteTimer(&kernel_timer);
}
void printHelp() {
printf("[Mandelbrot]\n");
printf("\tUsage Parameters\n");
printf("\t-device=n (requires to be in non-graphics mode)\n");
printf("\t-file=output.ppm (output file for image testing)\n");
printf("\t-mode=0,1 (0=Mandelbrot Set, 1=Julia Set)\n");
printf("\t-fp64 (run in double precision mode)\n");
}
////////////////////////////////////////////////////////////////////////////////
// Main program
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv) {
pArgc = &argc;
pArgv = argv;
#if defined(__linux__)
setenv("DISPLAY", ":0", 0);
#endif
printf("[%s] - Starting...\n", sSDKsample);
// parse command line arguments
if (checkCmdLineFlag(argc, (const char **)argv, "help")) {
printHelp();
exit(EXIT_SUCCESS);
}
int mode = 0;
if (checkCmdLineFlag(argc, (const char **)argv, "mode")) {
mode = getCmdLineArgumentInt(argc, (const char **)argv, "mode");
g_isJuliaSet = mode;
} else {
g_isJuliaSet = 0;
}
// Set the initial parameters for either Mandelbrot and Julia sets and reset
// all parameters
if (g_isJuliaSet) // settings for Julia
{
char *ref_path = sdkFindFilePath("params.txt", argv[0]);
startJulia(ref_path);
} else // settings for Mandelbrot
{
g_isMoving = true;
xOff = -0.5;
yOff = 0.0;
scale = 3.2;
xdOff = 0.0;
ydOff = 0.0;
dscale = 1.0;
colorSeed = 0;
colors.x = 3;
colors.y = 5;
colors.z = 7;
crunch = 512;
animationFrame = 0;
animationStep = 0;
pass = 0;
}
if (checkCmdLineFlag(argc, (const char **)argv, "file")) {
fpsLimit = frameCheckNumber;
// use command-line specified CUDA device, otherwise use device with highest
// Gflops/s
findCudaDevice(argc, (const char **)argv); // no OpenGL usage
// We run the Automated Testing code path
runSingleTest(argc, argv);
exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
} else if (checkCmdLineFlag(argc, (const char **)argv, "benchmark")) {
// run benchmark
// use command-line specified CUDA device, otherwise use device with highest
// Gflops/s
findCudaDevice(argc, (const char **)argv);
// We run the Automated Performance Test
runBenchmark(argc, argv);
exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
// use command-line specified CUDA device, otherwise use device with highest
// Gflops/s
else if (checkCmdLineFlag(argc, (const char **)argv, "device")) {
printf("[%s]\n", argv[0]);
printf(" Does not explicitly support -device=n in OpenGL mode\n");
printf(" To use -device=n, the sample must be running w/o OpenGL\n\n");
printf(" > %s -device=n -file=<image_name>.ppm\n", argv[0]);
printf("exiting...\n");
exit(EXIT_SUCCESS);
}
// Otherwise it succeeds, we will continue to run this sample
initData(argc, argv);
// Initialize OpenGL context first before the CUDA context is created. This
// is needed
// to achieve optimal performance with OpenGL/CUDA interop.
initGL(&argc, argv);
initOpenGLBuffers(imageW, imageH);
printf("Starting GLUT main loop...\n");
printf("\n");
printf("Press [s] to toggle between GPU and CPU implementations\n");
printf("Press [j] to toggle between Julia and Mandelbrot sets\n");
printf("Press [r] or [R] to decrease or increase red color channel\n");
printf("Press [g] or [G] to decrease or increase green color channel\n");
printf("Press [b] or [B] to decrease or increase blue color channel\n");
printf("Press [e] to reset\n");
printf("Press [a] or [A] to animate colors\n");
printf("Press [c] or [C] to change colors\n");
printf("Press [d] or [D] to increase or decrease the detail\n");
printf("Press [p] to record main parameters to file params.txt\n");
printf("Press [o] to read main parameters from file params.txt\n");
printf(
"Left mouse button + drag = move (Mandelbrot or Julia) or animate "
"(Julia)\n");
printf(
"Press [m] to toggle between move and animate (Julia) for left mouse "
"button\n");
printf("Middle mouse button + drag = Zoom\n");
printf("Right mouse button = Menu\n");
printf("Press [?] to print location and scale\n");
printf("Press [q] to exit\n");
printf("\n");
sdkCreateTimer(&hTimer);
sdkStartTimer(&hTimer);
#if defined(__APPLE__) || defined(MACOSX)
atexit(cleanup);
#else
glutCloseFunc(cleanup);
#endif
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
setVSync(0);
#endif
glutMainLoop();
} // main