cuda-samples/Samples/cdpBezierTessellation/BezierLineCDP.cu
2021-10-21 16:34:49 +05:30

208 lines
6.5 KiB
Plaintext

/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <cuda_runtime_api.h>
#include <helper_cuda.h>
#include <string.h>
__forceinline__ __device__ float2 operator+(float2 a, float2 b) {
float2 c;
c.x = a.x + b.x;
c.y = a.y + b.y;
return c;
}
__forceinline__ __device__ float2 operator-(float2 a, float2 b) {
float2 c;
c.x = a.x - b.x;
c.y = a.y - b.y;
return c;
}
__forceinline__ __device__ float2 operator*(float a, float2 b) {
float2 c;
c.x = a * b.x;
c.y = a * b.y;
return c;
}
__forceinline__ __device__ float length(float2 a) {
return sqrtf(a.x * a.x + a.y * a.y);
}
#define MAX_TESSELLATION 32
struct BezierLine {
float2 CP[3];
float2 *vertexPos;
int nVertices;
};
__global__ void computeBezierLinePositions(int lidx, BezierLine *bLines,
int nTessPoints) {
int idx = threadIdx.x + blockDim.x * blockIdx.x;
if (idx < nTessPoints) {
float u = (float)idx / (float)(nTessPoints - 1);
float omu = 1.0f - u;
float B3u[3];
B3u[0] = omu * omu;
B3u[1] = 2.0f * u * omu;
B3u[2] = u * u;
float2 position = {0, 0};
for (int i = 0; i < 3; i++) {
position = position + B3u[i] * bLines[lidx].CP[i];
}
bLines[lidx].vertexPos[idx] = position;
}
}
__global__ void computeBezierLinesCDP(BezierLine *bLines, int nLines) {
int lidx = threadIdx.x + blockDim.x * blockIdx.x;
if (lidx < nLines) {
float curvature = length(bLines[lidx].CP[1] -
0.5f * (bLines[lidx].CP[0] + bLines[lidx].CP[2])) /
length(bLines[lidx].CP[2] - bLines[lidx].CP[0]);
int nTessPoints = min(max((int)(curvature * 16.0f), 4), MAX_TESSELLATION);
if (bLines[lidx].vertexPos == NULL) {
bLines[lidx].nVertices = nTessPoints;
cudaMalloc((void **)&bLines[lidx].vertexPos,
nTessPoints * sizeof(float2));
}
computeBezierLinePositions<<<ceilf((float)bLines[lidx].nVertices / 32.0f),
32>>>(lidx, bLines, bLines[lidx].nVertices);
}
}
__global__ void freeVertexMem(BezierLine *bLines, int nLines) {
int lidx = threadIdx.x + blockDim.x * blockIdx.x;
if (lidx < nLines) cudaFree(bLines[lidx].vertexPos);
}
unsigned int checkCapableSM35Device(int argc, char **argv) {
// Get device properties
cudaDeviceProp properties;
int device_count = 0, device = -1;
if (checkCmdLineFlag(argc, (const char **)argv, "device")) {
device = getCmdLineArgumentInt(argc, (const char **)argv, "device");
cudaDeviceProp properties;
checkCudaErrors(cudaGetDeviceProperties(&properties, device));
if (properties.major > 3 ||
(properties.major == 3 && properties.minor >= 5)) {
printf("Running on GPU %d (%s)\n", device, properties.name);
} else {
printf(
"cdpBezierTessellation requires GPU devices with compute SM 3.5 or "
"higher.");
printf("Current GPU device has compute SM %d.%d. Exiting...\n",
properties.major, properties.minor);
return EXIT_FAILURE;
}
} else {
checkCudaErrors(cudaGetDeviceCount(&device_count));
for (int i = 0; i < device_count; ++i) {
checkCudaErrors(cudaGetDeviceProperties(&properties, i));
if (properties.major > 3 ||
(properties.major == 3 && properties.minor >= 5)) {
device = i;
printf("Running on GPU %d (%s)\n", i, properties.name);
break;
}
printf("GPU %d %s does not support CUDA Dynamic Parallelism\n", i,
properties.name);
}
}
if (device == -1) {
fprintf(stderr,
"cdpBezierTessellation requires GPU devices with compute SM 3.5 or "
"higher. Exiting...\n");
return EXIT_WAIVED;
}
return EXIT_SUCCESS;
}
#define N_LINES 256
#define BLOCK_DIM 64
int main(int argc, char **argv) {
BezierLine *bLines_h = new BezierLine[N_LINES];
float2 last = {0, 0};
for (int i = 0; i < N_LINES; i++) {
bLines_h[i].CP[0] = last;
for (int j = 1; j < 3; j++) {
bLines_h[i].CP[j].x = (float)rand() / (float)RAND_MAX;
bLines_h[i].CP[j].y = (float)rand() / (float)RAND_MAX;
}
last = bLines_h[i].CP[2];
bLines_h[i].vertexPos = NULL;
bLines_h[i].nVertices = 0;
}
unsigned int sm35Ret = checkCapableSM35Device(argc, argv);
if (sm35Ret != EXIT_SUCCESS) {
exit(sm35Ret);
}
BezierLine *bLines_d;
checkCudaErrors(cudaMalloc((void **)&bLines_d, N_LINES * sizeof(BezierLine)));
checkCudaErrors(cudaMemcpy(bLines_d, bLines_h, N_LINES * sizeof(BezierLine),
cudaMemcpyHostToDevice));
printf("Computing Bezier Lines (CUDA Dynamic Parallelism Version) ... ");
computeBezierLinesCDP<<<(unsigned int)ceil((float)N_LINES / (float)BLOCK_DIM),
BLOCK_DIM>>>(bLines_d, N_LINES);
printf("Done!\n");
// Do something to draw the lines here
freeVertexMem<<<(unsigned int)ceil((float)N_LINES / (float)BLOCK_DIM),
BLOCK_DIM>>>(bLines_d, N_LINES);
checkCudaErrors(cudaFree(bLines_d));
delete[] bLines_h;
exit(EXIT_SUCCESS);
}