mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-01-19 22:15:55 +08:00
289 lines
8.8 KiB
Plaintext
289 lines
8.8 KiB
Plaintext
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include <helper_cuda.h>
|
|
#include <helper_math.h>
|
|
#include <VFlockingD3D10.h>
|
|
|
|
#define PI 3.1415926536f
|
|
|
|
typedef unsigned int uint;
|
|
|
|
__device__ bool isInsideQuad_D(float2 pos0, float2 pos1, float width,
|
|
float height) {
|
|
if (fabs(pos0.x - pos1.x) < 0.5f * width &&
|
|
fabs(pos0.y - pos1.y) < 0.5f * height) {
|
|
return true;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
__device__ bool isInsideBird(float2 pixel, float2 pos, float width,
|
|
float height, float radius) {
|
|
if (abs(pixel.x - pos.x) < 0.5f * width &&
|
|
abs(pixel.y - pos.y) < 0.5f * height ||
|
|
(pixel.x - pos.x) * (pixel.x - pos.x) +
|
|
(pixel.y - pos.y) * (pixel.y - pos.y) <
|
|
radius * radius) {
|
|
return true;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
__global__ void cuda_kernel_update(float2 *newPos, float2 *curPos,
|
|
uint numBirds, bool *hasproxy,
|
|
bool *neighbors, bool *rightgoals,
|
|
bool *leftgoals, Params *params) {
|
|
uint i = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
|
if (i >= numBirds) {
|
|
return;
|
|
}
|
|
|
|
float minDist = 50000.f;
|
|
float2 dij = make_float2(0.f);
|
|
|
|
if (!hasproxy[i]) {
|
|
for (uint j = 0; j < numBirds; j++) {
|
|
if (j == i) {
|
|
continue;
|
|
}
|
|
|
|
if (leftgoals[i * numBirds + j]) {
|
|
dij = params->dX * normalize(curPos[j] - curPos[i]);
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
bool collision = false;
|
|
|
|
for (uint j = 0; j < numBirds; j++) {
|
|
float d;
|
|
|
|
if (leftgoals[i * numBirds + j]) {
|
|
d = curPos[j].x - (params->wingspan + params->lambda) - curPos[i].x;
|
|
|
|
if (fabs(d) < fabs(minDist)) {
|
|
minDist = d;
|
|
}
|
|
}
|
|
|
|
if (rightgoals[i * numBirds + j]) {
|
|
d = curPos[j].x + (params->wingspan + params->lambda) - curPos[i].x;
|
|
|
|
if (fabs(d) < fabs(minDist)) {
|
|
minDist = d;
|
|
}
|
|
}
|
|
|
|
if (neighbors[i * numBirds + j] && !collision) {
|
|
if (curPos[j].y >= curPos[i].y &&
|
|
curPos[j].y < curPos[i].y + params->epsilon) {
|
|
dij.y = -params->dY;
|
|
collision = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (fabs(minDist) <= params->dX) {
|
|
return;
|
|
}
|
|
|
|
dij.x = minDist > 0 ? params->dX : -params->dX;
|
|
}
|
|
|
|
newPos[i].x = curPos[i].x + dij.x;
|
|
newPos[i].y = curPos[i].y + dij.y;
|
|
}
|
|
|
|
__global__ void cuda_kernel_checktriples(float2 *pos, uint numBirds,
|
|
bool *hasproxy, bool *neighbors,
|
|
bool *rightgoals, bool *leftgoals,
|
|
uint3 *triples, Params *params) {
|
|
uint ith = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
|
if (ith >= numBirds * (numBirds - 1) * (numBirds - 2) / 6) {
|
|
return;
|
|
}
|
|
|
|
uint a[3];
|
|
a[0] = triples[ith].x;
|
|
a[1] = triples[ith].y;
|
|
a[2] = triples[ith].z;
|
|
|
|
uint i, j, x;
|
|
|
|
for (i = 0; i < 3; i++) {
|
|
for (j = 2; j > i; j--) {
|
|
if (pos[a[j - 1]].y > pos[a[j]].y) {
|
|
x = a[j - 1];
|
|
a[j - 1] = a[j];
|
|
a[j] = x;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (hasproxy[a[0]]) {
|
|
float a2a1 = pos[a[2]].x - pos[a[1]].x;
|
|
|
|
if (fabs(a2a1) < 2.f * (params->wingspan + params->lambda))
|
|
if (a2a1 >= 0) {
|
|
if (leftgoals[a[0] * numBirds + a[2]]) {
|
|
leftgoals[a[0] * numBirds + a[2]] = false;
|
|
}
|
|
|
|
if (rightgoals[a[0] * numBirds + a[1]]) {
|
|
rightgoals[a[0] * numBirds + a[1]] = false;
|
|
}
|
|
} else {
|
|
if (leftgoals[a[0] * numBirds + a[1]]) {
|
|
leftgoals[a[0] * numBirds + a[1]] = false;
|
|
}
|
|
|
|
if (rightgoals[a[0] * numBirds + a[2]]) {
|
|
rightgoals[a[0] * numBirds + a[2]] = false;
|
|
}
|
|
}
|
|
} else {
|
|
if ((leftgoals[a[0] * numBirds + a[2]]) &&
|
|
(leftgoals[a[0] * numBirds + a[1]]))
|
|
if ((length(pos[a[1]] - pos[a[0]]) < length(pos[a[2]] - pos[a[0]]))) {
|
|
leftgoals[a[0] * numBirds + a[2]] = false;
|
|
} else {
|
|
leftgoals[a[0] * numBirds + a[1]] = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
__global__ void cuda_kernel_checkpairs(float2 *pos, uint numBirds,
|
|
bool *hasproxy, bool *neighbors,
|
|
bool *rightgoals, bool *leftgoals,
|
|
uint2 *pairs, Params *params) {
|
|
uint i = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
|
if (i >= numBirds * (numBirds - 1) / 2) {
|
|
return;
|
|
}
|
|
|
|
uint front, back;
|
|
|
|
if (pos[pairs[i].y].y > pos[pairs[i].x].y) {
|
|
front = pairs[i].y;
|
|
back = pairs[i].x;
|
|
} else {
|
|
front = pairs[i].x;
|
|
back = pairs[i].y;
|
|
}
|
|
|
|
leftgoals[back * numBirds + front] = true;
|
|
rightgoals[back * numBirds + front] = true;
|
|
|
|
float2 stepback;
|
|
stepback.x = pos[front].x;
|
|
stepback.y = pos[front].y - 0.5f * params->upwashY;
|
|
|
|
if (isInsideQuad_D(
|
|
pos[back], stepback,
|
|
2.f * (params->wingspan + params->lambda + params->upwashX),
|
|
params->upwashY)) {
|
|
neighbors[back * numBirds + front] = true;
|
|
|
|
if (!hasproxy[back]) {
|
|
hasproxy[back] = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
extern "C" void cuda_simulate(float2 *newPos, float2 *curPos, uint numBirds,
|
|
bool *d_hasproxy, bool *d_neighbors,
|
|
bool *d_leftgoals, bool *d_rightgoals,
|
|
uint2 *d_pairs, uint3 *d_triples,
|
|
Params *d_params) {
|
|
cudaError_t error = cudaSuccess;
|
|
float tempms;
|
|
static float ms = 0.f;
|
|
static uint step = 0;
|
|
int smallblockSize = 32, midblockSize = 128, bigblockSize = 32;
|
|
|
|
cudaEvent_t e_start, e_stop;
|
|
cudaEventCreate(&e_start);
|
|
cudaEventCreate(&e_stop);
|
|
cudaEventRecord(e_start, 0);
|
|
|
|
cudaMemset(d_leftgoals, 0, numBirds * numBirds * sizeof(bool));
|
|
cudaMemset(d_rightgoals, 0, numBirds * numBirds * sizeof(bool));
|
|
cudaMemset(d_hasproxy, 0, numBirds * sizeof(bool));
|
|
cudaMemset(d_neighbors, 0, numBirds * numBirds * sizeof(bool));
|
|
|
|
dim3 Db = dim3(bigblockSize);
|
|
dim3 Dg =
|
|
dim3((numBirds * (numBirds - 1) / 2 + bigblockSize - 1) / bigblockSize);
|
|
cuda_kernel_checkpairs<<<Dg, Db>>>(curPos, numBirds, d_hasproxy, d_neighbors,
|
|
d_rightgoals, d_leftgoals, d_pairs,
|
|
d_params);
|
|
|
|
Db = dim3(midblockSize);
|
|
Dg =
|
|
dim3((numBirds * (numBirds - 1) * (numBirds - 2) / 6 + bigblockSize - 1) /
|
|
bigblockSize);
|
|
cuda_kernel_checktriples<<<Dg, Db>>>(curPos, numBirds, d_hasproxy,
|
|
d_neighbors, d_rightgoals, d_leftgoals,
|
|
d_triples, d_params);
|
|
|
|
Db = dim3(smallblockSize);
|
|
Dg = dim3((numBirds + smallblockSize - 1) / smallblockSize);
|
|
cuda_kernel_update<<<Dg, Db>>>(newPos, curPos, numBirds, d_hasproxy,
|
|
d_neighbors, d_rightgoals, d_leftgoals,
|
|
d_params /*, d_pWingTips */);
|
|
|
|
cudaDeviceSynchronize();
|
|
|
|
cudaEventRecord(e_stop, 0);
|
|
cudaEventSynchronize(e_stop);
|
|
cudaEventElapsedTime(&tempms, e_start, e_stop);
|
|
ms += tempms;
|
|
|
|
if (!(step % 100) && step) {
|
|
printf("GPU, step %d \ntime per step %6.3f ms \n", step, ms / 100.f);
|
|
ms = 0.f;
|
|
}
|
|
|
|
step++;
|
|
|
|
error = cudaGetLastError();
|
|
|
|
if (error != cudaSuccess) {
|
|
printf("one of the cuda kernels failed to launch, error = %d\n", error);
|
|
}
|
|
}
|