mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2024-11-28 18:49:14 +08:00
326 lines
12 KiB
Plaintext
326 lines
12 KiB
Plaintext
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/* Computation of eigenvalues of symmetric, tridiagonal matrix using
|
|
* bisection.
|
|
*/
|
|
|
|
// includes, system
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <math.h>
|
|
#include <float.h>
|
|
#include <assert.h>
|
|
|
|
// includes, project
|
|
#include <helper_functions.h>
|
|
#include <helper_cuda.h>
|
|
#include "config.h"
|
|
#include "structs.h"
|
|
#include "matlab.h"
|
|
#include "util.h"
|
|
#include "gerschgorin.h"
|
|
|
|
#include "bisect_small.cuh"
|
|
#include "bisect_large.cuh"
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// declaration, forward
|
|
bool runTest(int argc, char **argv);
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Program main
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
int main(int argc, char **argv) {
|
|
bool bQAResults = false;
|
|
|
|
printf("Starting eigenvalues\n");
|
|
|
|
bQAResults = runTest(argc, argv);
|
|
printf("Test %s\n", bQAResults ? "Succeeded!" : "Failed!");
|
|
|
|
exit(bQAResults ? EXIT_SUCCESS : EXIT_FAILURE);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//! Initialize the input data to the algorithm
|
|
//! @param input handles to the input data
|
|
//! @param exec_path path where executable is run (argv[0])
|
|
//! @param mat_size size of the matrix
|
|
//! @param user_defined 1 if the matrix size has been requested by the user,
|
|
//! 0 if the default size
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void initInputData(InputData &input, char *exec_path,
|
|
const unsigned int mat_size,
|
|
const unsigned int user_defined) {
|
|
// allocate memory
|
|
input.a = (float *)malloc(sizeof(float) * mat_size);
|
|
input.b = (float *)malloc(sizeof(float) * mat_size);
|
|
|
|
if (1 == user_defined) {
|
|
// initialize diagonal and superdiagonal entries with random values
|
|
srand(278217421);
|
|
|
|
// srand( clock());
|
|
for (unsigned int i = 0; i < mat_size; ++i) {
|
|
input.a[i] = (float)(2.0 * (((double)rand() / (double)RAND_MAX) - 0.5));
|
|
input.b[i] = (float)(2.0 * (((double)rand() / (double)RAND_MAX) - 0.5));
|
|
}
|
|
|
|
// the first element of s is used as padding on the device (thus the
|
|
// whole vector is copied to the device but the kernels are launched
|
|
// with (s+1) as start address
|
|
input.b[0] = 0.0f;
|
|
} else {
|
|
// read default matrix
|
|
unsigned int input_data_size = mat_size;
|
|
char *diag_path = sdkFindFilePath("diagonal.dat", exec_path);
|
|
assert(NULL != diag_path);
|
|
sdkReadFile(diag_path, &(input.a), &input_data_size, false);
|
|
|
|
char *sdiag_path = sdkFindFilePath("superdiagonal.dat", exec_path);
|
|
assert(NULL != sdiag_path);
|
|
sdkReadFile(sdiag_path, &(input.b), &input_data_size, false);
|
|
|
|
free(diag_path);
|
|
free(sdiag_path);
|
|
}
|
|
|
|
// allocate device memory for input
|
|
checkCudaErrors(cudaMalloc((void **)&(input.g_a), sizeof(float) * mat_size));
|
|
checkCudaErrors(
|
|
cudaMalloc((void **)&(input.g_b_raw), sizeof(float) * mat_size));
|
|
|
|
// copy data to device
|
|
checkCudaErrors(cudaMemcpy(input.g_a, input.a, sizeof(float) * mat_size,
|
|
cudaMemcpyHostToDevice));
|
|
checkCudaErrors(cudaMemcpy(input.g_b_raw, input.b, sizeof(float) * mat_size,
|
|
cudaMemcpyHostToDevice));
|
|
|
|
input.g_b = input.g_b_raw + 1;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//! Clean up input data, in particular allocated memory
|
|
//! @param input handles to the input data
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void cleanupInputData(InputData &input) {
|
|
freePtr(input.a);
|
|
freePtr(input.b);
|
|
|
|
checkCudaErrors(cudaFree(input.g_a));
|
|
input.g_a = NULL;
|
|
checkCudaErrors(cudaFree(input.g_b_raw));
|
|
input.g_b_raw = NULL;
|
|
input.g_b = NULL;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//! Check if a specific matrix size has to be used
|
|
//! @param argc number of command line arguments (from main(argc, argv)
|
|
//! @param argv pointers to command line arguments (from main(argc, argv)
|
|
//! @param matrix_size size of matrix, updated if specific size specified on
|
|
//! command line
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void getMatrixSize(int argc, char **argv, unsigned int &mat_size,
|
|
unsigned int &user_defined) {
|
|
int temp = -1;
|
|
|
|
if (checkCmdLineFlag(argc, (const char **)argv, "matrix-size")) {
|
|
temp = getCmdLineArgumentInt(argc, (const char **)argv, "matrix-size");
|
|
}
|
|
|
|
if (temp > 0) {
|
|
mat_size = (unsigned int)temp;
|
|
// data type short is used in the kernel
|
|
assert(mat_size < (1 << 16));
|
|
|
|
// mat_size should be large than 2
|
|
assert(mat_size >= 2);
|
|
|
|
user_defined = 1;
|
|
}
|
|
|
|
printf("Matrix size: %i x %i\n", mat_size, mat_size);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//! Check if a specific precision of the eigenvalue has to be obtained
|
|
//! @param argc number of command line arguments (from main(argc, argv)
|
|
//! @param argv pointers to command line arguments (from main(argc, argv)
|
|
//! @param iters_timing numbers of iterations for timing, updated if a
|
|
//! specific number is specified on the command line
|
|
//! @param user_defined 1 if the precision has been requested by the user,
|
|
//! 0 if the default size
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void getPrecision(int argc, char **argv, float &precision,
|
|
unsigned int &user_defined) {
|
|
float temp = -1.0f;
|
|
|
|
if (checkCmdLineFlag(argc, (const char **)argv, "precision")) {
|
|
temp = getCmdLineArgumentFloat(argc, (const char **)argv, "precision");
|
|
printf("Precision is between [0.001, 0.000001]\n");
|
|
}
|
|
|
|
if (temp > 1e-6 && temp <= 0.001) {
|
|
precision = temp;
|
|
user_defined = 1;
|
|
}
|
|
|
|
printf("Precision: %f\n", precision);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//! Check if a particular number of iterations for timings has to be used
|
|
//! @param argc number of command line arguments (from main(argc, argv)
|
|
//! @param argv pointers to command line arguments (from main(argc, argv)
|
|
//! @param iters_timing number of timing iterations, updated if user
|
|
//! specific value
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void getItersTiming(int argc, char **argv, unsigned int &iters_timing) {
|
|
int temp = -1;
|
|
|
|
if (checkCmdLineFlag(argc, (const char **)argv, "iters-timing")) {
|
|
temp = getCmdLineArgumentInt(argc, (const char **)argv, "iters-timing");
|
|
}
|
|
|
|
if (temp > 0) {
|
|
iters_timing = temp;
|
|
}
|
|
|
|
printf("Iterations to be timed: %i\n", iters_timing);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//! Check if a particular filename has to be used for the file where the result
|
|
//! is stored
|
|
//! @param argc number of command line arguments (from main(argc, argv)
|
|
//! @param argv pointers to command line arguments (from main(argc, argv)
|
|
//! @param filename filename of result file, updated if user specified
|
|
//! filename
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void getResultFilename(int argc, char **argv, char *&filename) {
|
|
char *temp = NULL;
|
|
getCmdLineArgumentString(argc, (const char **)argv, "filename-result", &temp);
|
|
|
|
if (NULL != temp) {
|
|
filename = (char *)malloc(sizeof(char) * strlen(temp));
|
|
strcpy(filename, temp);
|
|
|
|
free(temp);
|
|
}
|
|
|
|
printf("Result filename: '%s'\n", filename);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//! Run a simple test for CUDA
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
bool runTest(int argc, char **argv) {
|
|
bool bCompareResult = false;
|
|
|
|
findCudaDevice(argc, (const char **)argv);
|
|
|
|
StopWatchInterface *timer = NULL;
|
|
StopWatchInterface *timer_total = NULL;
|
|
sdkCreateTimer(&timer);
|
|
sdkCreateTimer(&timer_total);
|
|
|
|
// default
|
|
unsigned int mat_size = 2048;
|
|
// flag if the matrix size is due to explicit user request
|
|
unsigned int user_defined = 0;
|
|
// desired precision of eigenvalues
|
|
float precision = 0.00001f;
|
|
unsigned int iters_timing = 100;
|
|
char *result_file = (char *)"eigenvalues.dat";
|
|
|
|
// check if there is a command line request for the matrix size
|
|
getMatrixSize(argc, argv, mat_size, user_defined);
|
|
|
|
// check if user requested specific precision
|
|
getPrecision(argc, argv, precision, user_defined);
|
|
|
|
// check if user requested specific number of iterations for timing
|
|
getItersTiming(argc, argv, iters_timing);
|
|
|
|
// file name for result file
|
|
getResultFilename(argc, argv, result_file);
|
|
|
|
// set up input
|
|
InputData input;
|
|
initInputData(input, argv[0], mat_size, user_defined);
|
|
|
|
// compute Gerschgorin interval
|
|
float lg = FLT_MAX;
|
|
float ug = -FLT_MAX;
|
|
computeGerschgorin(input.a, input.b + 1, mat_size, lg, ug);
|
|
printf("Gerschgorin interval: %f / %f\n", lg, ug);
|
|
|
|
// two kernels, for small matrices a lot of overhead can be avoided
|
|
if (mat_size <= MAX_SMALL_MATRIX) {
|
|
// initialize memory for result
|
|
ResultDataSmall result;
|
|
initResultSmallMatrix(result, mat_size);
|
|
|
|
// run the kernel
|
|
computeEigenvaluesSmallMatrix(input, result, mat_size, lg, ug, precision,
|
|
iters_timing);
|
|
|
|
// get the result from the device and do some sanity checks,
|
|
// save the result
|
|
processResultSmallMatrix(input, result, mat_size, result_file);
|
|
|
|
// clean up
|
|
cleanupResultSmallMatrix(result);
|
|
|
|
printf("User requests non-default argument(s), skipping self-check!\n");
|
|
bCompareResult = true;
|
|
} else {
|
|
// initialize memory for result
|
|
ResultDataLarge result;
|
|
initResultDataLargeMatrix(result, mat_size);
|
|
|
|
// run the kernel
|
|
computeEigenvaluesLargeMatrix(input, result, mat_size, precision, lg, ug,
|
|
iters_timing);
|
|
|
|
// get the result from the device and do some sanity checks
|
|
// save the result if user specified matrix size
|
|
bCompareResult = processResultDataLargeMatrix(
|
|
input, result, mat_size, result_file, user_defined, argv[0]);
|
|
|
|
// cleanup
|
|
cleanupResultDataLargeMatrix(result);
|
|
}
|
|
|
|
cleanupInputData(input);
|
|
|
|
return bCompareResult;
|
|
}
|