/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * This application demonstrates an approach to the image segmentation * trees construction. It is based on Boruvka's MST algorithm. * Here's the complete list of references: * 1) V. Vineet et al, "Fast Minimum Spanning Tree for * Large Graphs on the GPU"; * 2) P. Felzenszwalb et al, "Efficient Graph-Based Image Segmentation"; * 3) A. Ion et al, "Considerations Regarding the Minimum Spanning * Tree Pyramid Segmentation Method". */ // System includes. #include #include #include #include // STL includes. #include #include #include #include #include #include #include // Thrust library includes. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // Sample framework includes. #include #include // Project includes. #include "common.cuh" // Kernels. #include "kernels.cuh" using std::cin; using std::cout; using std::endl; using std::vector; using std::list; using std::deque; // Very simple von Neumann middle-square prng. rand() is different across // various OS platforms, which makes testing and the output inconsistent. int myrand(void) { static int seed = 72191; char sq[22]; seed *= seed; sprintf(sq, "%010d", seed); // pull the middle 5 digits out of sq sq[8] = 0; seed = atoi(&sq[3]); return seed; } // Simple memory pool class. It is nothing more than array of fixed-sized // arrays. template class DeviceMemoryPool { public: // The parameters of the constructor are as follows: // 1) uint chunkSize --- size of the particular array; // 2) uint chunksCount --- number of fixed-sized arrays. DeviceMemoryPool(uint chunkSize, uint chunksCount) : chunkSize_(chunkSize) { chunkRawSize_ = (chunkSize * sizeof(T) + 511) & ~511; try { basePtr_ = thrust::device_malloc(chunkRawSize_ * chunksCount); } catch (thrust::system_error &e) { cout << "Pool memory allocation failed (" << e.what() << ")" << endl; exit(EXIT_FAILURE); } for (uint chunkIndex = 0; chunkIndex < chunksCount; ++chunkIndex) { chunks_.push_back( thrust::device_ptr( reinterpret_cast( static_cast(basePtr_.get()) + chunkRawSize_ * chunkIndex))); } } ~DeviceMemoryPool() { try { thrust::device_free(basePtr_); } catch (thrust::system_error &e) { cout << "Pool memory allocation failed (" << e.what() << ")" << endl; exit(EXIT_FAILURE); } } // Returns an address of the first available array // in the memory pool. thrust::device_ptr get() { thrust::device_ptr ptr(chunks_.back()); chunks_.pop_back(); return ptr; } // Pushes an address stored in "ptr" to the list // of available arrays of the memory pool. // It should be noted that it is user who is responsible for returning // the previously requested memory to the appropriate pool. inline void put(const thrust::device_ptr &ptr) { chunks_.push_back(ptr); } uint totalFreeChunks() const { return chunks_.size(); } private: uint chunkSize_, chunkRawSize_; thrust::device_ptr basePtr_; list< thrust::device_ptr > chunks_; }; // Graph structure. struct Graph { Graph() {} Graph(uint verticesCount, uint edgesCount) : vertices(verticesCount), edges(edgesCount), weights(edgesCount) {} // This vector stores offsets for each vertex in "edges" and "weights" // vectors. For example: // "vertices[0]" is an index of the first outgoing edge of vertex #0, // "vertices[1]" is an index of the first outgoing edge of vertex #1, etc. vector vertices; // This vector stores indices of endpoints of the corresponding edges. // For example, "edges[vertices[0]]" is the first neighbouring vertex // of vertex #0. vector edges; // This vector stores weights of the corresponding edges. vector weights; }; // Simple segmentation tree class. // Each level of the tree corresponds to the segmentation. // See "Level" class for the details. class Pyramid { public: void addLevel(uint totalSuperNodes, uint totalNodes, thrust::device_ptr superVerticesOffsets, thrust::device_ptr verticesIDs) { levels_.push_back(Level(totalSuperNodes, totalNodes)); levels_.back().buildFromDeviceData(superVerticesOffsets, verticesIDs); } uint levelsCount() const { return static_cast(levels_.size()); } void dump(uint width, uint height) const { char filename[256], format[256]; uint levelIndex = 0; uint requiredDigitsCount = static_cast(log10(static_cast(levelsCount()))) + 1; sprintf(format, "level_%%0%uu.ppm", requiredDigitsCount); for (LevelsIterator level = levels_.rbegin(); level != levels_.rend(); ++level, ++levelIndex) { sprintf(filename, format, levelIndex); dumpLevel(level, width, height, filename); } } private: // Level of the segmentation tree. class Level { public: Level(uint totalSuperNodes, uint totalNodes) : superNodesOffsets_(totalSuperNodes), nodes_(totalNodes) { } void buildFromDeviceData( thrust::device_ptr superVerticesOffsets, thrust::device_ptr verticesIDs) { checkCudaErrors( cudaMemcpy(&(superNodesOffsets_[0]), superVerticesOffsets.get(), sizeof(uint) * superNodesOffsets_.size(), cudaMemcpyDeviceToHost)); checkCudaErrors( cudaMemcpy(&(nodes_[0]), verticesIDs.get(), sizeof(uint) * nodes_.size(), cudaMemcpyDeviceToHost)); } private: friend class Pyramid; // The pair of the following vectors describes the // relation between the consecutive levels. // Consider an example. Let the index of the current level be n. // Then nodes of level #(n-1) with indices stored in // "nodes[superNodesOffsets_[0]]", // "nodes[superNodesOffsets_[0] + 1]", // ..., // "nodes[superNodesOffsets_[1] - 1]" // correspond to vertex #0 of level #n. An so on. vector superNodesOffsets_; vector nodes_; }; typedef list::const_reverse_iterator LevelsIterator; // Dumps level to the file "level_n.ppm" where n // is index of the level. Segments are drawn in random colors. void dumpLevel(LevelsIterator level, uint width, uint height, const char *filename) const { deque< std::pair > nodesQueue; uint totalSegments; { const vector &superNodesOffsets = level->superNodesOffsets_; const vector &nodes = level->nodes_; totalSegments = static_cast(superNodesOffsets.size()); for (uint superNodeIndex = 0, nodeIndex = 0; superNodeIndex < superNodesOffsets.size(); ++superNodeIndex) { uint superNodeEnd = superNodeIndex + 1 < superNodesOffsets.size() ? superNodesOffsets[superNodeIndex + 1] : static_cast(nodes.size()); for (; nodeIndex < superNodeEnd; ++nodeIndex) { nodesQueue.push_back(std::make_pair(nodes[nodeIndex], superNodeIndex)); } } } ++level; while (level != levels_.rend()) { uint superNodesCount = static_cast(nodesQueue.size()); const vector &superNodesOffsets = level->superNodesOffsets_; const vector &nodes = level->nodes_; while (superNodesCount--) { std::pair currentNode = nodesQueue.front(); nodesQueue.pop_front(); uint superNodeBegin = superNodesOffsets[currentNode.first]; uint superNodeEnd = currentNode.first + 1 < superNodesOffsets.size() ? superNodesOffsets[currentNode.first + 1] : static_cast(nodes.size()); for (uint nodeIndex = superNodeBegin; nodeIndex < superNodeEnd; ++nodeIndex) { nodesQueue.push_back( std::make_pair(nodes[nodeIndex], currentNode.second)); } } ++level; } vector colors(3 * totalSegments); for (uint colorIndex = 0; colorIndex < totalSegments; ++colorIndex) { colors[colorIndex * 3 ] = myrand() % 256; colors[colorIndex * 3 + 1] = myrand() % 256; colors[colorIndex * 3 + 2] = myrand() % 256; } uchar *image = new uchar[width * height * 3]; while (!nodesQueue.empty()) { std::pair currentNode = nodesQueue.front(); nodesQueue.pop_front(); uint pixelIndex = currentNode.first; uint pixelSegment = currentNode.second; image[pixelIndex * 3 ] = colors[pixelSegment * 3 ]; image[pixelIndex * 3 + 1] = colors[pixelSegment * 3 + 1]; image[pixelIndex * 3 + 2] = colors[pixelSegment * 3 + 2]; } __savePPM(filename, image, width, height, 3); delete[] image; } list levels_; }; // The class that encapsulates the main algorithm. class SegmentationTreeBuilder { public: SegmentationTreeBuilder():verticesCount_(0),edgesCount_(0) {} ~SegmentationTreeBuilder() {} // Repeatedly invokes the step of the algorithm // until the limiting segmentation is found. // Returns time (in ms) spent on building the tree. float run(const Graph &graph, Pyramid &segmentations) { cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, 0); // Allocate required memory pools. We need just 4 types of arrays. MemoryPoolsCollection pools = { DeviceMemoryPool( static_cast(graph.vertices.size()), kUintVerticesPoolsRequired), DeviceMemoryPool( static_cast(graph.vertices.size()), kFloatVerticesPoolsRequired), DeviceMemoryPool( static_cast(graph.edges.size()), kUintEdgesPoolsRequired), DeviceMemoryPool( static_cast(graph.edges.size()), kFloatEdgesPoolsRequired) }; // Initialize internal variables try { initalizeData(graph, pools); } catch (thrust::system_error &e) { cout << "Initialization failed (" << e.what() << ")" << endl; exit(EXIT_FAILURE); } // Run steps AlgorithmStatus status; try { do { status = invokeStep(pools, segmentations); } while (status != ALGORITHM_FINISHED); } catch (thrust::system_error &e) { cout << "Algorithm failed (" << e.what() << ")" << endl; exit(EXIT_FAILURE); } cudaEventRecord(stop, 0); cudaEventSynchronize(stop); float elapsedTime; cudaEventElapsedTime(&elapsedTime, start, stop); return elapsedTime; } private: void printMemoryUsage() { size_t availableMemory, totalMemory, usedMemory; cudaMemGetInfo(&availableMemory, &totalMemory); usedMemory = totalMemory - availableMemory; cout << "Device memory: used " << usedMemory << " available " << availableMemory << " total " << totalMemory << endl; } struct MemoryPoolsCollection { DeviceMemoryPool uintVertices; DeviceMemoryPool floatVertices; DeviceMemoryPool uintEdges; DeviceMemoryPool floatEdges; }; static const uint kUintVerticesPoolsRequired = 8; static const uint kFloatVerticesPoolsRequired = 3; static const uint kUintEdgesPoolsRequired = 8; static const uint kFloatEdgesPoolsRequired = 4; void initalizeData(const Graph &graph, MemoryPoolsCollection &pools) { // Get memory for the internal variables verticesCount_ = static_cast(graph.vertices.size()); edgesCount_ = static_cast(graph.edges.size()); dVertices_ = pools.uintVertices.get(); dEdges_ = pools.uintEdges.get(); dWeights_ = pools.floatEdges.get(); dOutputEdgesFlags_ = pools.uintEdges.get(); // Copy graph to the device memory checkCudaErrors(cudaMemcpy(dVertices_.get(), &(graph.vertices[0]), sizeof(uint) * verticesCount_, cudaMemcpyHostToDevice)); checkCudaErrors(cudaMemcpy(dEdges_.get(), &(graph.edges[0]), sizeof(uint) * edgesCount_, cudaMemcpyHostToDevice)); checkCudaErrors(cudaMemcpy(dWeights_.get(), &(graph.weights[0]), sizeof(float) * edgesCount_, cudaMemcpyHostToDevice)); thrust::fill(dOutputEdgesFlags_, dOutputEdgesFlags_ + edgesCount_, 0); } static const uint kMaxThreadsPerBlock = 256; // Calculates grid parameters of the consecutive kernel calls // based on the number of elements in the array. void calculateThreadsDistribution(uint totalElements, uint &blocksCount, uint &threadsPerBlockCount) { if (totalElements > kMaxThreadsPerBlock) { blocksCount = (totalElements + kMaxThreadsPerBlock - 1) / kMaxThreadsPerBlock; threadsPerBlockCount = kMaxThreadsPerBlock; } else { blocksCount = 1; threadsPerBlockCount = totalElements; } } enum AlgorithmStatus { ALGORITHM_NOT_FINISHED, ALGORITHM_FINISHED }; AlgorithmStatus invokeStep(MemoryPoolsCollection &pools, Pyramid &segmentations) { uint blocksCount, threadsPerBlockCount; calculateThreadsDistribution(edgesCount_, blocksCount, threadsPerBlockCount); dim3 gridDimsForEdges(blocksCount, 1, 1); dim3 blockDimsForEdges(threadsPerBlockCount, 1, 1); calculateThreadsDistribution(verticesCount_, blocksCount, threadsPerBlockCount); dim3 gridDimsForVertices(blocksCount, 1, 1); dim3 blockDimsForVertices(threadsPerBlockCount, 1, 1); thrust::device_ptr dEdgesFlags = pools.uintEdges.get(); thrust::fill(dEdgesFlags, dEdgesFlags + edgesCount_, 0); // Mark the first edge for each vertex in "dEdgesFlags" markSegments<<< gridDimsForVertices, blockDimsForVertices, 0 >>> (dVertices_.get(), dEdgesFlags.get(), verticesCount_); getLastCudaError("markSegments launch failed."); // Now find minimum edges for each vertex. thrust::device_ptr dMinScannedEdges = pools.uintEdges.get(); thrust::device_ptr dMinScannedWeights = pools.floatEdges.get(); thrust::inclusive_scan_by_key( dEdgesFlags, dEdgesFlags + edgesCount_, thrust::make_zip_iterator( thrust::make_tuple(dWeights_, dEdges_)), thrust::make_zip_iterator( thrust::make_tuple(dMinScannedWeights, dMinScannedEdges)), thrust::greater_equal(), thrust::minimum< thrust::tuple >()); // To make things clear. // Let "dEdgesFlags" denote groups of edges that // correspond to the same vertices. Then the last edge of each group // (in "dMinScannedEdges" and "dMinScannedWeights") is now minimal. // Calculate a successor vertex for each vertex. A successor of the // vertex v is a neighbouring vertex connected to v // by the minimal edge. thrust::device_ptr dSuccessors = pools.uintVertices.get(); getSuccessors<<< gridDimsForVertices, blockDimsForVertices, 0 >>> (dVertices_.get(), dMinScannedEdges.get(), dSuccessors.get(), verticesCount_, edgesCount_); getLastCudaError("getSuccessors launch failed."); pools.uintEdges.put(dMinScannedEdges); pools.floatEdges.put(dMinScannedWeights); // Remove cyclic successor dependencies. Note that there can be only // two vertices in a cycle. See [1] for details. removeCycles<<< gridDimsForVertices, blockDimsForVertices, 0 >>> (dSuccessors.get(), verticesCount_); getLastCudaError("removeCycles launch failed."); // Build up an array of startpoints for edges. As already stated, // each group of edges denoted by "dEdgesFlags" // has the same startpoint. thrust::device_ptr dStartpoints = pools.uintEdges.get(); thrust::inclusive_scan(dEdgesFlags, dEdgesFlags + edgesCount_, dStartpoints); addScalar<<< gridDimsForEdges, blockDimsForEdges, 0 >>> (dStartpoints.get(), -1, edgesCount_); getLastCudaError("addScalar launch failed."); // Shrink the chains of successors. New successors will eventually // represent superpixels of the new level. thrust::device_ptr dRepresentatives = pools.uintVertices.get(); getRepresentatives <<< gridDimsForVertices, blockDimsForVertices, 0 >>> (dSuccessors.get(), dRepresentatives.get(), verticesCount_); getLastCudaError("getRepresentatives launch failed."); swap(dSuccessors, dRepresentatives); pools.uintVertices.put(dRepresentatives); // Group vertices by successors' indices. thrust::device_ptr dClusteredVerticesIDs = pools.uintVertices.get(); thrust::sequence(dClusteredVerticesIDs, dClusteredVerticesIDs + verticesCount_); thrust::sort( thrust::make_zip_iterator( thrust::make_tuple( thrust::device_ptr (dSuccessors), thrust::device_ptr (dClusteredVerticesIDs))), thrust::make_zip_iterator( thrust::make_tuple( thrust::device_ptr (dSuccessors + verticesCount_), thrust::device_ptr (dClusteredVerticesIDs + verticesCount_)))); // Mark those groups. thrust::device_ptr dVerticesFlags_ = pools.uintVertices.get(); thrust::fill(dVerticesFlags_, dVerticesFlags_ + verticesCount_, 0); thrust::adjacent_difference(dSuccessors, dSuccessors + verticesCount_, dVerticesFlags_, thrust::not_equal_to()); cudaMemset((void *) dVerticesFlags_.get(), 0, sizeof(uint)); // Assign new indices to the successors (the indices of vertices // at the new level). thrust::device_ptr dNewVerticesIDs_ = pools.uintVertices.get(); thrust::inclusive_scan(dVerticesFlags_, dVerticesFlags_ + verticesCount_, dNewVerticesIDs_); pools.uintVertices.put(dVerticesFlags_); // Now we can calculate number of resulting superpixels easily. uint newVerticesCount; cudaMemcpy(&newVerticesCount, (dNewVerticesIDs_ + verticesCount_ - 1).get(), sizeof(uint), cudaMemcpyDeviceToHost); ++newVerticesCount; // There are two special cases when we can stop our algorithm: // 1) number of vertices in the graph remained unchanged; // 2) only one vertex remains. if (newVerticesCount == verticesCount_) { return ALGORITHM_FINISHED; } else if (newVerticesCount == 1) { thrust::device_ptr dDummyVerticesOffsets = pools.uintVertices.get(); cudaMemset((void *) dDummyVerticesOffsets.get(), 0, sizeof(uint)); thrust::device_ptr dDummyVerticesIDs = pools.uintVertices.get(); thrust::sequence(dDummyVerticesIDs, dDummyVerticesIDs + verticesCount_); segmentations.addLevel(1, verticesCount_, dDummyVerticesOffsets, dDummyVerticesIDs); return ALGORITHM_FINISHED; } // Calculate how old vertices IDs map to new vertices IDs. thrust::device_ptr dVerticesMapping = pools.uintVertices.get(); getVerticesMapping <<< gridDimsForVertices, blockDimsForVertices, 0 >>> (dClusteredVerticesIDs.get(), dNewVerticesIDs_.get(), dVerticesMapping.get(), verticesCount_); getLastCudaError("getVerticesMapping launch failed."); pools.uintVertices.put(dNewVerticesIDs_); pools.uintVertices.put(dClusteredVerticesIDs); pools.uintVertices.put(dSuccessors); // Invalidate self-loops in the reduced graph (the graph // produced by merging all old vertices that have // the same successor). invalidateLoops<<< gridDimsForEdges, blockDimsForEdges, 0 >>> (dStartpoints.get(), dVerticesMapping.get(), dEdges_.get(), edgesCount_); getLastCudaError("invalidateLoops launch failed."); // Calculate various information about the surviving // (new startpoints IDs and IDs of edges) and // non-surviving/contracted edges (their weights). thrust::device_ptr dNewStartpoints = pools.uintEdges.get(); thrust::device_ptr dSurvivedEdgesIDs = pools.uintEdges.get(); calculateEdgesInfo<<< gridDimsForEdges, blockDimsForEdges, 0 >>> (dStartpoints.get(), dVerticesMapping.get(), dEdges_.get(), dWeights_.get(), dNewStartpoints.get(), dSurvivedEdgesIDs.get(), edgesCount_, newVerticesCount); getLastCudaError("calculateEdgesInfo launch failed."); pools.uintEdges.put(dStartpoints); // Group that information by the new startpoints IDs. // Keep in mind that we want to build new (reduced) graph and apply // the step of the algorithm to that one. Hence we need to // preserve the structure of the original graph: neighbours and // weights should be grouped by vertex. thrust::sort( thrust::make_zip_iterator( thrust::make_tuple(dNewStartpoints, dSurvivedEdgesIDs)), thrust::make_zip_iterator( thrust::make_tuple(dNewStartpoints + edgesCount_, dSurvivedEdgesIDs + edgesCount_))); // Find the group of contracted edges. uint *invalidEdgesPtr = thrust::find_if( dNewStartpoints, dNewStartpoints + edgesCount_, IsGreaterEqualThan(newVerticesCount)).get(); // Calculate how many edges there are in the reduced graph. uint validEdgesCount = static_cast(invalidEdgesPtr - dNewStartpoints.get()); // Mark groups of edges corresponding to the same vertex in the // reduced graph. thrust::adjacent_difference(dNewStartpoints, dNewStartpoints + edgesCount_, dEdgesFlags, thrust::not_equal_to()); cudaMemset((void *) dEdgesFlags.get(), 0, sizeof(uint)); cudaMemset((void *) dEdgesFlags.get(), 1, 1); pools.uintEdges.put(dNewStartpoints); // Now we are able to build the reduced graph. See "Graph" // class for the details on the graph's internal structure. // Calculate vertices' offsets for the reduced graph. thrust::copy_if(thrust::make_counting_iterator(0U), thrust::make_counting_iterator(validEdgesCount), dEdgesFlags, dVertices_, thrust::identity()).get(); pools.uintEdges.put(dEdgesFlags); // Build up a neighbourhood for each vertex in the reduced graph // (this includes recalculating edges' weights). calculateThreadsDistribution(validEdgesCount, blocksCount, threadsPerBlockCount); dim3 newGridDimsForEdges(blocksCount, 1, 1); dim3 newBlockDimsForEdges(threadsPerBlockCount, 1, 1); thrust::device_ptr dNewEdges = pools.uintEdges.get(); thrust::device_ptr dNewWeights = pools.floatEdges.get(); makeNewEdges<<< newGridDimsForEdges, newBlockDimsForEdges, 0 >>> (dSurvivedEdgesIDs.get(), dVerticesMapping.get(), dEdges_.get(), dWeights_.get(), dNewEdges.get(), dNewWeights.get(), validEdgesCount); getLastCudaError("makeNewEdges launch failed."); swap(dEdges_, dNewEdges); swap(dWeights_, dNewWeights); pools.uintEdges.put(dNewEdges); pools.floatEdges.put(dNewWeights); pools.uintEdges.put(dSurvivedEdgesIDs); // The graph's reconstruction is now finished. // Build new level of the segmentation tree. It is a trivial task // as we already have "dVerticesMapping" that contains all // sufficient information about the vertices' transformations. thrust::device_ptr dVerticesIDs = pools.uintVertices.get(); thrust::device_ptr dNewVerticesOffsets = pools.uintVertices.get(); thrust::sequence(dVerticesIDs, dVerticesIDs + verticesCount_); thrust::sort_by_key(dVerticesMapping, dVerticesMapping + verticesCount_, dVerticesIDs); thrust::unique_by_key_copy(dVerticesMapping, dVerticesMapping + verticesCount_, thrust::make_counting_iterator(0), thrust::make_discard_iterator(), dNewVerticesOffsets); segmentations.addLevel(newVerticesCount, verticesCount_, dNewVerticesOffsets, dVerticesIDs); pools.uintVertices.put(dVerticesIDs); pools.uintVertices.put(dNewVerticesOffsets); pools.uintVertices.put(dVerticesMapping); // We can now safely set new counts for vertices and edges. verticesCount_ = newVerticesCount; edgesCount_ = validEdgesCount; return ALGORITHM_NOT_FINISHED; } uint verticesCount_; uint edgesCount_; thrust::device_ptr dVertices_; thrust::device_ptr dEdges_; thrust::device_ptr dWeights_; thrust::device_ptr dOutputEdgesFlags_; }; // Loads PPM image. int loadImage(const char *filename, const char *executablePath, vector &data, uint &width, uint &height) { const char *imagePath = sdkFindFilePath(filename, executablePath); if (imagePath == NULL) { return -1; } uchar *dataHandle = NULL; unsigned int channels; if (!__loadPPM(imagePath, &dataHandle, &width, &height, &channels)) { return -1; } data.assign(reinterpret_cast(dataHandle), reinterpret_cast(dataHandle) + width * height); free(reinterpret_cast(dataHandle)); return 0; } inline float distance(const uchar3 &first, const uchar3 &second) { int dx = static_cast(first.x) - static_cast(second.x); int dy = static_cast(first.y) - static_cast(second.y); int dz = static_cast(first.z) - static_cast(second.z); uint sqrResult = dx * dx + dy * dy + dz * dz; return sqrt(static_cast(sqrResult)); } // Builds a net-graph for the image with 4-connected pixels. void buildGraph(const vector &image, uint width, uint height, Graph &graph) { uint totalNodes = static_cast(image.size()); graph.vertices.resize(totalNodes); graph.edges.reserve(4 * totalNodes - 2 * (width + height)); graph.weights.reserve(graph.edges.size()); uint edgesProcessed = 0; for (uint y = 0; y < height; ++y) { for (uint x = 0; x < width; ++x) { uint nodeIndex = y * width + x; const uchar3 ¢erPixel = image[nodeIndex]; graph.vertices[nodeIndex] = edgesProcessed; if (y > 0) { uint lowerNodeIndex = (y - 1) * width + x; const uchar3 &lowerPixel = image[lowerNodeIndex]; graph.edges.push_back(lowerNodeIndex); graph.weights.push_back(distance(centerPixel, lowerPixel)); ++edgesProcessed; } if (y + 1 < height) { uint upperNodeIndex = (y + 1) * width + x; const uchar3 &upperPixel = image[upperNodeIndex]; graph.edges.push_back(upperNodeIndex); graph.weights.push_back(distance(centerPixel, upperPixel)); ++edgesProcessed; } if (x > 0) { uint leftNodeIndex = y * width + x - 1; const uchar3 &leftPixel = image[leftNodeIndex]; graph.edges.push_back(leftNodeIndex); graph.weights.push_back(distance(centerPixel, leftPixel)); ++edgesProcessed; } if (x + 1 < width) { uint rightNodeIndex = y * width + x + 1; const uchar3 &rightPixel = image[rightNodeIndex]; graph.edges.push_back(rightNodeIndex); graph.weights.push_back(distance(centerPixel, rightPixel)); ++edgesProcessed; } } } } static char *kDefaultImageName = (char*)"test.ppm"; int main(int argc, char **argv) { vector image; uint imageWidth, imageHeight; char *imageName; printf("%s Starting...\n\n", argv[0]); imageName = (char *)kDefaultImageName; if (checkCmdLineFlag(argc, (const char **) argv, "file")) { getCmdLineArgumentString(argc, (const char **) argv, "file", &imageName); } if (loadImage(imageName, argv[0], image, imageWidth, imageHeight) != 0) { printf("Failed to open <%s>, program exit...\n", imageName); exit(EXIT_FAILURE); } findCudaDevice(argc, (const char **)argv); Graph graph; buildGraph(image, imageWidth, imageHeight, graph); Pyramid segmentations; cout << "* Building segmentation tree... "; cout.flush(); SegmentationTreeBuilder algo; float elapsedTime = algo.run(graph, segmentations); cout << "done in " << elapsedTime << " (ms)" << endl; cout << "* Dumping levels for each tree..." << endl << endl; segmentations.dump(imageWidth, imageHeight); bool bResults[2]; bResults[0] = sdkComparePPM("level_00.ppm", sdkFindFilePath("ref_00.ppm", argv[0]), 5.0f, 0.15f, false); bResults[1] = sdkComparePPM("level_09.ppm", sdkFindFilePath("ref_09.ppm", argv[0]), 5.0f, 0.15f, false); exit((bResults[0] && bResults[1]) ? EXIT_SUCCESS : EXIT_FAILURE); }