cuda-samples/Samples/2_Concepts_and_Techniques/eigenvalues/main.cu
2022-01-13 11:35:24 +05:30

326 lines
12 KiB
Plaintext

/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Computation of eigenvalues of symmetric, tridiagonal matrix using
* bisection.
*/
// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <float.h>
#include <assert.h>
// includes, project
#include <helper_functions.h>
#include <helper_cuda.h>
#include "config.h"
#include "structs.h"
#include "matlab.h"
#include "util.h"
#include "gerschgorin.h"
#include "bisect_small.cuh"
#include "bisect_large.cuh"
////////////////////////////////////////////////////////////////////////////////
// declaration, forward
bool runTest(int argc, char **argv);
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv) {
bool bQAResults = false;
printf("Starting eigenvalues\n");
bQAResults = runTest(argc, argv);
printf("Test %s\n", bQAResults ? "Succeeded!" : "Failed!");
exit(bQAResults ? EXIT_SUCCESS : EXIT_FAILURE);
}
////////////////////////////////////////////////////////////////////////////////
//! Initialize the input data to the algorithm
//! @param input handles to the input data
//! @param exec_path path where executable is run (argv[0])
//! @param mat_size size of the matrix
//! @param user_defined 1 if the matrix size has been requested by the user,
//! 0 if the default size
////////////////////////////////////////////////////////////////////////////////
void initInputData(InputData &input, char *exec_path,
const unsigned int mat_size,
const unsigned int user_defined) {
// allocate memory
input.a = (float *)malloc(sizeof(float) * mat_size);
input.b = (float *)malloc(sizeof(float) * mat_size);
if (1 == user_defined) {
// initialize diagonal and superdiagonal entries with random values
srand(278217421);
// srand( clock());
for (unsigned int i = 0; i < mat_size; ++i) {
input.a[i] = (float)(2.0 * (((double)rand() / (double)RAND_MAX) - 0.5));
input.b[i] = (float)(2.0 * (((double)rand() / (double)RAND_MAX) - 0.5));
}
// the first element of s is used as padding on the device (thus the
// whole vector is copied to the device but the kernels are launched
// with (s+1) as start address
input.b[0] = 0.0f;
} else {
// read default matrix
unsigned int input_data_size = mat_size;
char *diag_path = sdkFindFilePath("diagonal.dat", exec_path);
assert(NULL != diag_path);
sdkReadFile(diag_path, &(input.a), &input_data_size, false);
char *sdiag_path = sdkFindFilePath("superdiagonal.dat", exec_path);
assert(NULL != sdiag_path);
sdkReadFile(sdiag_path, &(input.b), &input_data_size, false);
free(diag_path);
free(sdiag_path);
}
// allocate device memory for input
checkCudaErrors(cudaMalloc((void **)&(input.g_a), sizeof(float) * mat_size));
checkCudaErrors(
cudaMalloc((void **)&(input.g_b_raw), sizeof(float) * mat_size));
// copy data to device
checkCudaErrors(cudaMemcpy(input.g_a, input.a, sizeof(float) * mat_size,
cudaMemcpyHostToDevice));
checkCudaErrors(cudaMemcpy(input.g_b_raw, input.b, sizeof(float) * mat_size,
cudaMemcpyHostToDevice));
input.g_b = input.g_b_raw + 1;
}
////////////////////////////////////////////////////////////////////////////////
//! Clean up input data, in particular allocated memory
//! @param input handles to the input data
////////////////////////////////////////////////////////////////////////////////
void cleanupInputData(InputData &input) {
freePtr(input.a);
freePtr(input.b);
checkCudaErrors(cudaFree(input.g_a));
input.g_a = NULL;
checkCudaErrors(cudaFree(input.g_b_raw));
input.g_b_raw = NULL;
input.g_b = NULL;
}
////////////////////////////////////////////////////////////////////////////////
//! Check if a specific matrix size has to be used
//! @param argc number of command line arguments (from main(argc, argv)
//! @param argv pointers to command line arguments (from main(argc, argv)
//! @param matrix_size size of matrix, updated if specific size specified on
//! command line
////////////////////////////////////////////////////////////////////////////////
void getMatrixSize(int argc, char **argv, unsigned int &mat_size,
unsigned int &user_defined) {
int temp = -1;
if (checkCmdLineFlag(argc, (const char **)argv, "matrix-size")) {
temp = getCmdLineArgumentInt(argc, (const char **)argv, "matrix-size");
}
if (temp > 0) {
mat_size = (unsigned int)temp;
// data type short is used in the kernel
assert(mat_size < (1 << 16));
// mat_size should be large than 2
assert(mat_size >= 2);
user_defined = 1;
}
printf("Matrix size: %i x %i\n", mat_size, mat_size);
}
////////////////////////////////////////////////////////////////////////////////
//! Check if a specific precision of the eigenvalue has to be obtained
//! @param argc number of command line arguments (from main(argc, argv)
//! @param argv pointers to command line arguments (from main(argc, argv)
//! @param iters_timing numbers of iterations for timing, updated if a
//! specific number is specified on the command line
//! @param user_defined 1 if the precision has been requested by the user,
//! 0 if the default size
////////////////////////////////////////////////////////////////////////////////
void getPrecision(int argc, char **argv, float &precision,
unsigned int &user_defined) {
float temp = -1.0f;
if (checkCmdLineFlag(argc, (const char **)argv, "precision")) {
temp = getCmdLineArgumentFloat(argc, (const char **)argv, "precision");
printf("Precision is between [0.001, 0.000001]\n");
}
if (temp > 1e-6 && temp <= 0.001) {
precision = temp;
user_defined = 1;
}
printf("Precision: %f\n", precision);
}
////////////////////////////////////////////////////////////////////////////////
//! Check if a particular number of iterations for timings has to be used
//! @param argc number of command line arguments (from main(argc, argv)
//! @param argv pointers to command line arguments (from main(argc, argv)
//! @param iters_timing number of timing iterations, updated if user
//! specific value
////////////////////////////////////////////////////////////////////////////////
void getItersTiming(int argc, char **argv, unsigned int &iters_timing) {
int temp = -1;
if (checkCmdLineFlag(argc, (const char **)argv, "iters-timing")) {
temp = getCmdLineArgumentInt(argc, (const char **)argv, "iters-timing");
}
if (temp > 0) {
iters_timing = temp;
}
printf("Iterations to be timed: %i\n", iters_timing);
}
////////////////////////////////////////////////////////////////////////////////
//! Check if a particular filename has to be used for the file where the result
//! is stored
//! @param argc number of command line arguments (from main(argc, argv)
//! @param argv pointers to command line arguments (from main(argc, argv)
//! @param filename filename of result file, updated if user specified
//! filename
////////////////////////////////////////////////////////////////////////////////
void getResultFilename(int argc, char **argv, char *&filename) {
char *temp = NULL;
getCmdLineArgumentString(argc, (const char **)argv, "filename-result", &temp);
if (NULL != temp) {
filename = (char *)malloc(sizeof(char) * strlen(temp));
strcpy(filename, temp);
free(temp);
}
printf("Result filename: '%s'\n", filename);
}
////////////////////////////////////////////////////////////////////////////////
//! Run a simple test for CUDA
////////////////////////////////////////////////////////////////////////////////
bool runTest(int argc, char **argv) {
bool bCompareResult = false;
findCudaDevice(argc, (const char **)argv);
StopWatchInterface *timer = NULL;
StopWatchInterface *timer_total = NULL;
sdkCreateTimer(&timer);
sdkCreateTimer(&timer_total);
// default
unsigned int mat_size = 2048;
// flag if the matrix size is due to explicit user request
unsigned int user_defined = 0;
// desired precision of eigenvalues
float precision = 0.00001f;
unsigned int iters_timing = 100;
char *result_file = (char *)"eigenvalues.dat";
// check if there is a command line request for the matrix size
getMatrixSize(argc, argv, mat_size, user_defined);
// check if user requested specific precision
getPrecision(argc, argv, precision, user_defined);
// check if user requested specific number of iterations for timing
getItersTiming(argc, argv, iters_timing);
// file name for result file
getResultFilename(argc, argv, result_file);
// set up input
InputData input;
initInputData(input, argv[0], mat_size, user_defined);
// compute Gerschgorin interval
float lg = FLT_MAX;
float ug = -FLT_MAX;
computeGerschgorin(input.a, input.b + 1, mat_size, lg, ug);
printf("Gerschgorin interval: %f / %f\n", lg, ug);
// two kernels, for small matrices a lot of overhead can be avoided
if (mat_size <= MAX_SMALL_MATRIX) {
// initialize memory for result
ResultDataSmall result;
initResultSmallMatrix(result, mat_size);
// run the kernel
computeEigenvaluesSmallMatrix(input, result, mat_size, lg, ug, precision,
iters_timing);
// get the result from the device and do some sanity checks,
// save the result
processResultSmallMatrix(input, result, mat_size, result_file);
// clean up
cleanupResultSmallMatrix(result);
printf("User requests non-default argument(s), skipping self-check!\n");
bCompareResult = true;
} else {
// initialize memory for result
ResultDataLarge result;
initResultDataLargeMatrix(result, mat_size);
// run the kernel
computeEigenvaluesLargeMatrix(input, result, mat_size, precision, lg, ug,
iters_timing);
// get the result from the device and do some sanity checks
// save the result if user specified matrix size
bCompareResult = processResultDataLargeMatrix(
input, result, mat_size, result_file, user_defined, argv[0]);
// cleanup
cleanupResultDataLargeMatrix(result);
}
cleanupInputData(input);
return bCompareResult;
}