cuda-samples/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/kernels.cuh

225 lines
6.9 KiB
Plaintext
Raw Normal View History

2022-01-13 14:05:24 +08:00
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
2021-10-21 19:04:49 +08:00
*
2022-01-13 14:05:24 +08:00
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
2021-10-21 19:04:49 +08:00
*
2022-01-13 14:05:24 +08:00
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2021-10-21 19:04:49 +08:00
*/
/*
* Various kernels and functors used throughout the algorithm. For details
* on usage see "SegmentationTreeBuilder::invokeStep()".
*/
#ifndef _KERNELS_H_
#define _KERNELS_H_
#include <stdio.h>
#include <thrust/functional.h>
#include "common.cuh"
// Functors used with thrust library.
template <typename Input>
struct IsGreaterEqualThan : public thrust::unary_function<Input, bool>
{
__host__ __device__ IsGreaterEqualThan(uint upperBound) :
upperBound_(upperBound) {}
__host__ __device__ bool operator()(const Input &value) const
{
return value >= upperBound_;
}
uint upperBound_;
};
// CUDA kernels.
__global__ void addScalar(uint *array, int scalar, uint size)
{
uint tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid < size)
{
array[tid] += scalar;
}
}
__global__ void markSegments(const uint *verticesOffsets,
uint *flags,
uint verticesCount)
{
uint tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid < verticesCount)
{
flags[verticesOffsets[tid]] = 1;
}
}
__global__ void getVerticesMapping(const uint *clusteredVerticesIDs,
const uint *newVerticesIDs,
uint *verticesMapping,
uint verticesCount)
{
uint tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid < verticesCount)
{
uint vertexID = clusteredVerticesIDs[tid];
verticesMapping[vertexID] = newVerticesIDs[tid];
}
}
__global__ void getSuccessors(const uint *verticesOffsets,
const uint *minScannedEdges,
uint *successors,
uint verticesCount,
uint edgesCount)
{
uint tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid < verticesCount)
{
uint successorPos = (tid < verticesCount - 1) ?
(verticesOffsets[tid + 1] - 1) :
(edgesCount - 1);
successors[tid] = minScannedEdges[successorPos];
}
}
__global__ void removeCycles(uint *successors,
uint verticesCount)
{
uint tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid < verticesCount)
{
uint successor = successors[tid];
uint nextSuccessor = successors[successor];
if (tid == nextSuccessor)
{
if (tid < successor)
{
successors[tid] = tid;
}
else
{
successors[successor] = successor;
}
}
}
}
__global__ void getRepresentatives(const uint *successors,
uint *representatives,
uint verticesCount)
{
uint tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid < verticesCount)
{
uint successor = successors[tid];
uint nextSuccessor = successors[successor];
while (successor != nextSuccessor)
{
successor = nextSuccessor;
nextSuccessor = successors[nextSuccessor];
}
representatives[tid] = successor;
}
}
__global__ void invalidateLoops(const uint *startpoints,
const uint *verticesMapping,
uint *edges,
uint edgesCount)
{
uint tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid < edgesCount)
{
uint startpoint = startpoints[tid];
uint &endpoint = edges[tid];
uint newStartpoint = verticesMapping[startpoint];
uint newEndpoint = verticesMapping[endpoint];
if (newStartpoint == newEndpoint)
{
endpoint = UINT_MAX;
}
}
}
__global__ void calculateEdgesInfo(const uint *startpoints,
const uint *verticesMapping,
const uint *edges,
const float *weights,
uint *newStartpoints,
uint *survivedEdgesIDs,
uint edgesCount,
uint newVerticesCount)
{
uint tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid < edgesCount)
{
uint startpoint = startpoints[tid];
uint endpoint = edges[tid];
newStartpoints[tid] = endpoint < UINT_MAX ?
verticesMapping[startpoint] :
newVerticesCount + verticesMapping[startpoint];
survivedEdgesIDs[tid] = endpoint < UINT_MAX ?
tid :
UINT_MAX;
}
}
__global__ void makeNewEdges(const uint *survivedEdgesIDs,
const uint *verticesMapping,
const uint *edges,
const float *weights,
uint *newEdges,
float *newWeights,
uint edgesCount)
{
uint tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid < edgesCount)
{
uint edgeID = survivedEdgesIDs[tid];
uint oldEdge = edges[edgeID];
newEdges[tid] = verticesMapping[oldEdge];
newWeights[tid] = weights[edgeID];
}
}
#endif // #ifndef _KERNELS_H_