/* * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. * * Please refer to the NVIDIA end user license agreement (EULA) associated * with this source code for terms and conditions that govern your use of * this software. Any use, reproduction, disclosure, or distribution of * this software and related documentation outside the terms of the EULA * is strictly prohibited. * */ // OpenGL Graphics includes #define HELPERGL_EXTERN_GL_FUNC_IMPLEMENTATION #include "particleSystem.h" #include #include #include #include #include #include #include #include #include #include #include "particleSystem.cuh" #include "particles_kernel.cuh" #ifndef CUDART_PI_F #define CUDART_PI_F 3.141592654f #endif ParticleSystem::ParticleSystem(uint numParticles, uint3 gridSize, bool bUseOpenGL) : m_bInitialized(false) , m_bUseOpenGL(bUseOpenGL) , m_numParticles(numParticles) , m_hPos(0) , m_hVel(0) , m_dPos(0) , m_dVel(0) , m_gridSize(gridSize) , m_timer(NULL) , m_solverIterations(1) { m_numGridCells = m_gridSize.x * m_gridSize.y * m_gridSize.z; // float3 worldSize = make_float3(2.0f, 2.0f, 2.0f); m_gridSortBits = 18; // increase this for larger grids // set simulation parameters m_params.gridSize = m_gridSize; m_params.numCells = m_numGridCells; m_params.numBodies = m_numParticles; m_params.particleRadius = 1.0f / 64.0f; m_params.colliderPos = make_float3(-1.2f, -0.8f, 0.8f); m_params.colliderRadius = 0.2f; m_params.worldOrigin = make_float3(-1.0f, -1.0f, -1.0f); // m_params.cellSize = make_float3(worldSize.x / m_gridSize.x, worldSize.y / m_gridSize.y, worldSize.z / // m_gridSize.z); float cellSize = m_params.particleRadius * 2.0f; // cell size equal to particle diameter m_params.cellSize = make_float3(cellSize, cellSize, cellSize); m_params.spring = 0.5f; m_params.damping = 0.02f; m_params.shear = 0.1f; m_params.attraction = 0.0f; m_params.boundaryDamping = -0.5f; m_params.gravity = make_float3(0.0f, -0.0003f, 0.0f); m_params.globalDamping = 1.0f; _initialize(numParticles); } ParticleSystem::~ParticleSystem() { _finalize(); m_numParticles = 0; } uint ParticleSystem::createVBO(uint size) { GLuint vbo; glGenBuffers(1, &vbo); glBindBuffer(GL_ARRAY_BUFFER, vbo); glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW); glBindBuffer(GL_ARRAY_BUFFER, 0); return vbo; } inline float lerp(float a, float b, float t) { return a + t * (b - a); } // create a color ramp void colorRamp(float t, float *r) { const int ncolors = 7; float c[ncolors][3] = { { 1.0, 0.0, 0.0, }, { 1.0, 0.5, 0.0, }, { 1.0, 1.0, 0.0, }, { 0.0, 1.0, 0.0, }, { 0.0, 1.0, 1.0, }, { 0.0, 0.0, 1.0, }, { 1.0, 0.0, 1.0, }, }; t = t * (ncolors - 1); int i = (int)t; float u = t - floorf(t); r[0] = lerp(c[i][0], c[i + 1][0], u); r[1] = lerp(c[i][1], c[i + 1][1], u); r[2] = lerp(c[i][2], c[i + 1][2], u); } void ParticleSystem::_initialize(int numParticles) { assert(!m_bInitialized); m_numParticles = numParticles; // allocate host storage m_hPos = new float[m_numParticles * 4]; m_hVel = new float[m_numParticles * 4]; memset(m_hPos, 0, m_numParticles * 4 * sizeof(float)); memset(m_hVel, 0, m_numParticles * 4 * sizeof(float)); m_hCellStart = new uint[m_numGridCells]; memset(m_hCellStart, 0, m_numGridCells * sizeof(uint)); m_hCellEnd = new uint[m_numGridCells]; memset(m_hCellEnd, 0, m_numGridCells * sizeof(uint)); // allocate GPU data unsigned int memSize = sizeof(float) * 4 * m_numParticles; if (m_bUseOpenGL) { m_posVbo = createVBO(memSize); registerGLBufferObject(m_posVbo, &m_cuda_posvbo_resource); } else { checkCudaErrors(cudaMalloc((void **)&m_cudaPosVBO, memSize)); } allocateArray((void **)&m_dVel, memSize); allocateArray((void **)&m_dSortedPos, memSize); allocateArray((void **)&m_dSortedVel, memSize); allocateArray((void **)&m_dGridParticleHash, m_numParticles * sizeof(uint)); allocateArray((void **)&m_dGridParticleIndex, m_numParticles * sizeof(uint)); allocateArray((void **)&m_dCellStart, m_numGridCells * sizeof(uint)); allocateArray((void **)&m_dCellEnd, m_numGridCells * sizeof(uint)); if (m_bUseOpenGL) { m_colorVBO = createVBO(m_numParticles * 4 * sizeof(float)); registerGLBufferObject(m_colorVBO, &m_cuda_colorvbo_resource); // fill color buffer glBindBuffer(GL_ARRAY_BUFFER, m_colorVBO); float *data = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY); float *ptr = data; for (uint i = 0; i < m_numParticles; i++) { float t = i / (float)m_numParticles; #if 0 *ptr++ = rand() / (float) RAND_MAX; *ptr++ = rand() / (float) RAND_MAX; *ptr++ = rand() / (float) RAND_MAX; #else colorRamp(t, ptr); ptr += 3; #endif *ptr++ = 1.0f; } glUnmapBuffer(GL_ARRAY_BUFFER); } else { checkCudaErrors(cudaMalloc((void **)&m_cudaColorVBO, sizeof(float) * numParticles * 4)); } sdkCreateTimer(&m_timer); setParameters(&m_params); m_bInitialized = true; } void ParticleSystem::_finalize() { assert(m_bInitialized); delete[] m_hPos; delete[] m_hVel; delete[] m_hCellStart; delete[] m_hCellEnd; freeArray(m_dVel); freeArray(m_dSortedPos); freeArray(m_dSortedVel); freeArray(m_dGridParticleHash); freeArray(m_dGridParticleIndex); freeArray(m_dCellStart); freeArray(m_dCellEnd); if (m_bUseOpenGL) { unregisterGLBufferObject(m_cuda_colorvbo_resource); unregisterGLBufferObject(m_cuda_posvbo_resource); glDeleteBuffers(1, (const GLuint *)&m_posVbo); glDeleteBuffers(1, (const GLuint *)&m_colorVBO); } else { checkCudaErrors(cudaFree(m_cudaPosVBO)); checkCudaErrors(cudaFree(m_cudaColorVBO)); } } // step the simulation void ParticleSystem::update(float deltaTime) { assert(m_bInitialized); float *dPos; if (m_bUseOpenGL) { dPos = (float *)mapGLBufferObject(&m_cuda_posvbo_resource); } else { dPos = (float *)m_cudaPosVBO; } // update constants setParameters(&m_params); // integrate integrateSystem(dPos, m_dVel, deltaTime, m_numParticles); // calculate grid hash calcHash(m_dGridParticleHash, m_dGridParticleIndex, dPos, m_numParticles); // sort particles based on hash sortParticles(m_dGridParticleHash, m_dGridParticleIndex, m_numParticles); // reorder particle arrays into sorted order and // find start and end of each cell reorderDataAndFindCellStart(m_dCellStart, m_dCellEnd, m_dSortedPos, m_dSortedVel, m_dGridParticleHash, m_dGridParticleIndex, dPos, m_dVel, m_numParticles, m_numGridCells); // process collisions collide(m_dVel, m_dSortedPos, m_dSortedVel, m_dGridParticleIndex, m_dCellStart, m_dCellEnd, m_numParticles, m_numGridCells); // note: do unmap at end here to avoid unnecessary graphics/CUDA context switch if (m_bUseOpenGL) { unmapGLBufferObject(m_cuda_posvbo_resource); } } void ParticleSystem::dumpGrid() { // dump grid information copyArrayFromDevice(m_hCellStart, m_dCellStart, 0, sizeof(uint) * m_numGridCells); copyArrayFromDevice(m_hCellEnd, m_dCellEnd, 0, sizeof(uint) * m_numGridCells); uint maxCellSize = 0; for (uint i = 0; i < m_numGridCells; i++) { if (m_hCellStart[i] != 0xffffffff) { uint cellSize = m_hCellEnd[i] - m_hCellStart[i]; // printf("cell: %d, %d particles\n", i, cellSize); if (cellSize > maxCellSize) { maxCellSize = cellSize; } } } printf("maximum particles per cell = %d\n", maxCellSize); } void ParticleSystem::dumpParticles(uint start, uint count) { // debug copyArrayFromDevice(m_hPos, 0, &m_cuda_posvbo_resource, sizeof(float) * 4 * count); copyArrayFromDevice(m_hVel, m_dVel, 0, sizeof(float) * 4 * count); for (uint i = start; i < start + count; i++) { // printf("%d: ", i); printf("pos: (%.4f, %.4f, %.4f, %.4f)\n", m_hPos[i * 4 + 0], m_hPos[i * 4 + 1], m_hPos[i * 4 + 2], m_hPos[i * 4 + 3]); printf("vel: (%.4f, %.4f, %.4f, %.4f)\n", m_hVel[i * 4 + 0], m_hVel[i * 4 + 1], m_hVel[i * 4 + 2], m_hVel[i * 4 + 3]); } } float *ParticleSystem::getArray(ParticleArray array) { assert(m_bInitialized); float *hdata = 0; float *ddata = 0; struct cudaGraphicsResource *cuda_vbo_resource = 0; switch (array) { default: case POSITION: hdata = m_hPos; ddata = m_dPos; cuda_vbo_resource = m_cuda_posvbo_resource; break; case VELOCITY: hdata = m_hVel; ddata = m_dVel; break; } copyArrayFromDevice(hdata, ddata, &cuda_vbo_resource, m_numParticles * 4 * sizeof(float)); return hdata; } void ParticleSystem::setArray(ParticleArray array, const float *data, int start, int count) { assert(m_bInitialized); switch (array) { default: case POSITION: { if (m_bUseOpenGL) { unregisterGLBufferObject(m_cuda_posvbo_resource); glBindBuffer(GL_ARRAY_BUFFER, m_posVbo); glBufferSubData(GL_ARRAY_BUFFER, start * 4 * sizeof(float), count * 4 * sizeof(float), data); glBindBuffer(GL_ARRAY_BUFFER, 0); registerGLBufferObject(m_posVbo, &m_cuda_posvbo_resource); } else { copyArrayToDevice(m_cudaPosVBO, data, start * 4 * sizeof(float), count * 4 * sizeof(float)); } } break; case VELOCITY: copyArrayToDevice(m_dVel, data, start * 4 * sizeof(float), count * 4 * sizeof(float)); break; } } inline float frand() { return rand() / (float)RAND_MAX; } void ParticleSystem::initGrid(uint *size, float spacing, float jitter, uint numParticles) { srand(1973); for (uint z = 0; z < size[2]; z++) { for (uint y = 0; y < size[1]; y++) { for (uint x = 0; x < size[0]; x++) { uint i = (z * size[1] * size[0]) + (y * size[0]) + x; if (i < numParticles) { m_hPos[i * 4] = (spacing * x) + m_params.particleRadius - 1.0f + (frand() * 2.0f - 1.0f) * jitter; m_hPos[i * 4 + 1] = (spacing * y) + m_params.particleRadius - 1.0f + (frand() * 2.0f - 1.0f) * jitter; m_hPos[i * 4 + 2] = (spacing * z) + m_params.particleRadius - 1.0f + (frand() * 2.0f - 1.0f) * jitter; m_hPos[i * 4 + 3] = 1.0f; m_hVel[i * 4] = 0.0f; m_hVel[i * 4 + 1] = 0.0f; m_hVel[i * 4 + 2] = 0.0f; m_hVel[i * 4 + 3] = 0.0f; } } } } } void ParticleSystem::reset(ParticleConfig config) { switch (config) { default: case CONFIG_RANDOM: { int p = 0, v = 0; for (uint i = 0; i < m_numParticles; i++) { float point[3]; point[0] = frand(); point[1] = frand(); point[2] = frand(); m_hPos[p++] = 2 * (point[0] - 0.5f); m_hPos[p++] = 2 * (point[1] - 0.5f); m_hPos[p++] = 2 * (point[2] - 0.5f); m_hPos[p++] = 1.0f; // radius m_hVel[v++] = 0.0f; m_hVel[v++] = 0.0f; m_hVel[v++] = 0.0f; m_hVel[v++] = 0.0f; } } break; case CONFIG_GRID: { float jitter = m_params.particleRadius * 0.01f; uint s = (int)ceilf(powf((float)m_numParticles, 1.0f / 3.0f)); uint gridSize[3]; gridSize[0] = gridSize[1] = gridSize[2] = s; initGrid(gridSize, m_params.particleRadius * 2.0f, jitter, m_numParticles); } break; } setArray(POSITION, m_hPos, 0, m_numParticles); setArray(VELOCITY, m_hVel, 0, m_numParticles); } void ParticleSystem::addSphere(int start, float *pos, float *vel, int r, float spacing) { uint index = start; for (int z = -r; z <= r; z++) { for (int y = -r; y <= r; y++) { for (int x = -r; x <= r; x++) { float dx = x * spacing; float dy = y * spacing; float dz = z * spacing; float l = sqrtf(dx * dx + dy * dy + dz * dz); float jitter = m_params.particleRadius * 0.01f; if ((l <= m_params.particleRadius * 2.0f * r) && (index < m_numParticles)) { m_hPos[index * 4] = pos[0] + dx + (frand() * 2.0f - 1.0f) * jitter; m_hPos[index * 4 + 1] = pos[1] + dy + (frand() * 2.0f - 1.0f) * jitter; m_hPos[index * 4 + 2] = pos[2] + dz + (frand() * 2.0f - 1.0f) * jitter; m_hPos[index * 4 + 3] = pos[3]; m_hVel[index * 4] = vel[0]; m_hVel[index * 4 + 1] = vel[1]; m_hVel[index * 4 + 2] = vel[2]; m_hVel[index * 4 + 3] = vel[3]; index++; } } } } setArray(POSITION, m_hPos, start, index); setArray(VELOCITY, m_hVel, start, index); }