mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-01-20 03:25:47 +08:00
502 lines
14 KiB
C++
502 lines
14 KiB
C++
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <cusolverDn.h>
|
|
#include <math.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
#include "mmio.h"
|
|
|
|
/* avoid Windows warnings (for example: strcpy, fscanf, etc.) */
|
|
#if defined(_WIN32)
|
|
#define _CRT_SECURE_NO_WARNINGS
|
|
#endif
|
|
|
|
/* various __inline__ __device__ function to initialize a T_ELEM */
|
|
template <typename T_ELEM>
|
|
__inline__ T_ELEM cuGet(int);
|
|
template <>
|
|
__inline__ float cuGet<float>(int x) {
|
|
return float(x);
|
|
}
|
|
|
|
template <>
|
|
__inline__ double cuGet<double>(int x) {
|
|
return double(x);
|
|
}
|
|
|
|
template <>
|
|
__inline__ cuComplex cuGet<cuComplex>(int x) {
|
|
return (make_cuComplex(float(x), 0.0f));
|
|
}
|
|
|
|
template <>
|
|
__inline__ cuDoubleComplex cuGet<cuDoubleComplex>(int x) {
|
|
return (make_cuDoubleComplex(double(x), 0.0));
|
|
}
|
|
|
|
template <typename T_ELEM>
|
|
__inline__ T_ELEM cuGet(int, int);
|
|
template <>
|
|
__inline__ float cuGet<float>(int x, int y) {
|
|
return float(x);
|
|
}
|
|
|
|
template <>
|
|
__inline__ double cuGet<double>(int x, int y) {
|
|
return double(x);
|
|
}
|
|
|
|
template <>
|
|
__inline__ cuComplex cuGet<cuComplex>(int x, int y) {
|
|
return make_cuComplex(float(x), float(y));
|
|
}
|
|
|
|
template <>
|
|
__inline__ cuDoubleComplex cuGet<cuDoubleComplex>(int x, int y) {
|
|
return (make_cuDoubleComplex(double(x), double(y)));
|
|
}
|
|
|
|
template <typename T_ELEM>
|
|
__inline__ T_ELEM cuGet(float);
|
|
template <>
|
|
__inline__ float cuGet<float>(float x) {
|
|
return float(x);
|
|
}
|
|
|
|
template <>
|
|
__inline__ double cuGet<double>(float x) {
|
|
return double(x);
|
|
}
|
|
|
|
template <>
|
|
__inline__ cuComplex cuGet<cuComplex>(float x) {
|
|
return (make_cuComplex(float(x), 0.0f));
|
|
}
|
|
|
|
template <>
|
|
__inline__ cuDoubleComplex cuGet<cuDoubleComplex>(float x) {
|
|
return (make_cuDoubleComplex(double(x), 0.0));
|
|
}
|
|
|
|
template <typename T_ELEM>
|
|
__inline__ T_ELEM cuGet(float, float);
|
|
template <>
|
|
__inline__ float cuGet<float>(float x, float y) {
|
|
return float(x);
|
|
}
|
|
|
|
template <>
|
|
__inline__ double cuGet<double>(float x, float y) {
|
|
return double(x);
|
|
}
|
|
|
|
template <>
|
|
__inline__ cuComplex cuGet<cuComplex>(float x, float y) {
|
|
return (make_cuComplex(float(x), float(y)));
|
|
}
|
|
|
|
template <>
|
|
__inline__ cuDoubleComplex cuGet<cuDoubleComplex>(float x, float y) {
|
|
return (make_cuDoubleComplex(double(x), double(y)));
|
|
}
|
|
|
|
template <typename T_ELEM>
|
|
__inline__ T_ELEM cuGet(double);
|
|
template <>
|
|
__inline__ float cuGet<float>(double x) {
|
|
return float(x);
|
|
}
|
|
|
|
template <>
|
|
__inline__ double cuGet<double>(double x) {
|
|
return double(x);
|
|
}
|
|
|
|
template <>
|
|
__inline__ cuComplex cuGet<cuComplex>(double x) {
|
|
return (make_cuComplex(float(x), 0.0f));
|
|
}
|
|
|
|
template <>
|
|
__inline__ cuDoubleComplex cuGet<cuDoubleComplex>(double x) {
|
|
return (make_cuDoubleComplex(double(x), 0.0));
|
|
}
|
|
|
|
template <typename T_ELEM>
|
|
__inline__ T_ELEM cuGet(double, double);
|
|
template <>
|
|
__inline__ float cuGet<float>(double x, double y) {
|
|
return float(x);
|
|
}
|
|
|
|
template <>
|
|
__inline__ double cuGet<double>(double x, double y) {
|
|
return double(x);
|
|
}
|
|
|
|
template <>
|
|
__inline__ cuComplex cuGet<cuComplex>(double x, double y) {
|
|
return (make_cuComplex(float(x), float(y)));
|
|
}
|
|
|
|
template <>
|
|
__inline__ cuDoubleComplex cuGet<cuDoubleComplex>(double x, double y) {
|
|
return (make_cuDoubleComplex(double(x), double(y)));
|
|
}
|
|
|
|
static void compress_index(const int *Ind, int nnz, int m, int *Ptr, int base) {
|
|
int i;
|
|
|
|
/* initialize everything to zero */
|
|
for (i = 0; i < m + 1; i++) {
|
|
Ptr[i] = 0;
|
|
}
|
|
/* count elements in every row */
|
|
Ptr[0] = base;
|
|
for (i = 0; i < nnz; i++) {
|
|
Ptr[Ind[i] + (1 - base)]++;
|
|
}
|
|
/* add all the values */
|
|
for (i = 0; i < m; i++) {
|
|
Ptr[i + 1] += Ptr[i];
|
|
}
|
|
}
|
|
|
|
struct cooFormat {
|
|
int i;
|
|
int j;
|
|
int p; // permutation
|
|
};
|
|
|
|
int cmp_cooFormat_csr(struct cooFormat *s, struct cooFormat *t) {
|
|
if (s->i < t->i) {
|
|
return -1;
|
|
} else if (s->i > t->i) {
|
|
return 1;
|
|
} else {
|
|
return s->j - t->j;
|
|
}
|
|
}
|
|
|
|
int cmp_cooFormat_csc(struct cooFormat *s, struct cooFormat *t) {
|
|
if (s->j < t->j) {
|
|
return -1;
|
|
} else if (s->j > t->j) {
|
|
return 1;
|
|
} else {
|
|
return s->i - t->i;
|
|
}
|
|
}
|
|
|
|
typedef int (*FUNPTR)(const void *, const void *);
|
|
typedef int (*FUNPTR2)(struct cooFormat *s, struct cooFormat *t);
|
|
|
|
static FUNPTR2 fptr_array[2] = {
|
|
cmp_cooFormat_csr,
|
|
cmp_cooFormat_csc,
|
|
};
|
|
|
|
static int verify_pattern(int m, int nnz, int *csrRowPtr, int *csrColInd) {
|
|
int i, col, start, end, base_index;
|
|
int error_found = 0;
|
|
|
|
if (nnz != (csrRowPtr[m] - csrRowPtr[0])) {
|
|
fprintf(stderr,
|
|
"Error (nnz check failed): (csrRowPtr[%d]=%d - csrRowPtr[%d]=%d) "
|
|
"!= (nnz=%d)\n",
|
|
0, csrRowPtr[0], m, csrRowPtr[m], nnz);
|
|
error_found = 1;
|
|
}
|
|
|
|
base_index = csrRowPtr[0];
|
|
if ((0 != base_index) && (1 != base_index)) {
|
|
fprintf(stderr, "Error (base index check failed): base index = %d\n",
|
|
base_index);
|
|
error_found = 1;
|
|
}
|
|
|
|
for (i = 0; (!error_found) && (i < m); i++) {
|
|
start = csrRowPtr[i] - base_index;
|
|
end = csrRowPtr[i + 1] - base_index;
|
|
if (start > end) {
|
|
fprintf(
|
|
stderr,
|
|
"Error (corrupted row): csrRowPtr[%d] (=%d) > csrRowPtr[%d] (=%d)\n",
|
|
i, start + base_index, i + 1, end + base_index);
|
|
error_found = 1;
|
|
}
|
|
for (col = start; col < end; col++) {
|
|
if (csrColInd[col] < base_index) {
|
|
fprintf(
|
|
stderr,
|
|
"Error (column vs. base index check failed): csrColInd[%d] < %d\n",
|
|
col, base_index);
|
|
error_found = 1;
|
|
}
|
|
if ((col < (end - 1)) && (csrColInd[col] >= csrColInd[col + 1])) {
|
|
fprintf(stderr,
|
|
"Error (sorting of the column indecis check failed): "
|
|
"(csrColInd[%d]=%d) >= (csrColInd[%d]=%d)\n",
|
|
col, csrColInd[col], col + 1, csrColInd[col + 1]);
|
|
error_found = 1;
|
|
}
|
|
}
|
|
}
|
|
return error_found;
|
|
}
|
|
|
|
template <typename T_ELEM>
|
|
int loadMMSparseMatrix(char *filename, char elem_type, bool csrFormat, int *m,
|
|
int *n, int *nnz, T_ELEM **aVal, int **aRowInd,
|
|
int **aColInd, int extendSymMatrix) {
|
|
MM_typecode matcode;
|
|
double *tempVal;
|
|
int *tempRowInd, *tempColInd;
|
|
double *tval;
|
|
int *trow, *tcol;
|
|
int *csrRowPtr, *cscColPtr;
|
|
int i, j, error, base, count;
|
|
struct cooFormat *work;
|
|
|
|
/* read the matrix */
|
|
error = mm_read_mtx_crd(filename, m, n, nnz, &trow, &tcol, &tval, &matcode);
|
|
if (error) {
|
|
fprintf(stderr, "!!!! can not open file: '%s'\n", filename);
|
|
return 1;
|
|
}
|
|
|
|
/* start error checking */
|
|
if (mm_is_complex(matcode) && ((elem_type != 'z') && (elem_type != 'c'))) {
|
|
fprintf(stderr, "!!!! complex matrix requires type 'z' or 'c'\n");
|
|
return 1;
|
|
}
|
|
|
|
if (mm_is_dense(matcode) || mm_is_array(matcode) ||
|
|
mm_is_pattern(matcode) /*|| mm_is_integer(matcode)*/) {
|
|
fprintf(
|
|
stderr,
|
|
"!!!! dense, array, pattern and integer matrices are not supported\n");
|
|
return 1;
|
|
}
|
|
|
|
/* if necessary symmetrize the pattern (transform from triangular to full) */
|
|
if ((extendSymMatrix) && (mm_is_symmetric(matcode) ||
|
|
mm_is_hermitian(matcode) || mm_is_skew(matcode))) {
|
|
// count number of non-diagonal elements
|
|
count = 0;
|
|
for (i = 0; i < (*nnz); i++) {
|
|
if (trow[i] != tcol[i]) {
|
|
count++;
|
|
}
|
|
}
|
|
// allocate space for the symmetrized matrix
|
|
tempRowInd = (int *)malloc((*nnz + count) * sizeof(int));
|
|
tempColInd = (int *)malloc((*nnz + count) * sizeof(int));
|
|
if (mm_is_real(matcode) || mm_is_integer(matcode)) {
|
|
tempVal = (double *)malloc((*nnz + count) * sizeof(double));
|
|
} else {
|
|
tempVal = (double *)malloc(2 * (*nnz + count) * sizeof(double));
|
|
}
|
|
// copy the elements regular and transposed locations
|
|
for (j = 0, i = 0; i < (*nnz); i++) {
|
|
tempRowInd[j] = trow[i];
|
|
tempColInd[j] = tcol[i];
|
|
if (mm_is_real(matcode) || mm_is_integer(matcode)) {
|
|
tempVal[j] = tval[i];
|
|
} else {
|
|
tempVal[2 * j] = tval[2 * i];
|
|
tempVal[2 * j + 1] = tval[2 * i + 1];
|
|
}
|
|
j++;
|
|
if (trow[i] != tcol[i]) {
|
|
tempRowInd[j] = tcol[i];
|
|
tempColInd[j] = trow[i];
|
|
if (mm_is_real(matcode) || mm_is_integer(matcode)) {
|
|
if (mm_is_skew(matcode)) {
|
|
tempVal[j] = -tval[i];
|
|
} else {
|
|
tempVal[j] = tval[i];
|
|
}
|
|
} else {
|
|
if (mm_is_hermitian(matcode)) {
|
|
tempVal[2 * j] = tval[2 * i];
|
|
tempVal[2 * j + 1] = -tval[2 * i + 1];
|
|
} else {
|
|
tempVal[2 * j] = tval[2 * i];
|
|
tempVal[2 * j + 1] = tval[2 * i + 1];
|
|
}
|
|
}
|
|
j++;
|
|
}
|
|
}
|
|
(*nnz) += count;
|
|
// free temporary storage
|
|
free(trow);
|
|
free(tcol);
|
|
free(tval);
|
|
} else {
|
|
tempRowInd = trow;
|
|
tempColInd = tcol;
|
|
tempVal = tval;
|
|
}
|
|
// life time of (trow, tcol, tval) is over.
|
|
// please use COO format (tempRowInd, tempColInd, tempVal)
|
|
|
|
// use qsort to sort COO format
|
|
work = (struct cooFormat *)malloc(sizeof(struct cooFormat) * (*nnz));
|
|
if (NULL == work) {
|
|
fprintf(stderr, "!!!! allocation error, malloc failed\n");
|
|
return 1;
|
|
}
|
|
for (i = 0; i < (*nnz); i++) {
|
|
work[i].i = tempRowInd[i];
|
|
work[i].j = tempColInd[i];
|
|
work[i].p = i; // permutation is identity
|
|
}
|
|
|
|
if (csrFormat) {
|
|
/* create row-major ordering of indices (sorted by row and within each row
|
|
* by column) */
|
|
qsort(work, *nnz, sizeof(struct cooFormat), (FUNPTR)fptr_array[0]);
|
|
} else {
|
|
/* create column-major ordering of indices (sorted by column and within each
|
|
* column by row) */
|
|
qsort(work, *nnz, sizeof(struct cooFormat), (FUNPTR)fptr_array[1]);
|
|
}
|
|
|
|
// (tempRowInd, tempColInd) is sorted either by row-major or by col-major
|
|
for (i = 0; i < (*nnz); i++) {
|
|
tempRowInd[i] = work[i].i;
|
|
tempColInd[i] = work[i].j;
|
|
}
|
|
|
|
// setup base
|
|
// check if there is any row/col 0, if so base-0
|
|
// check if there is any row/col equal to matrix dimension m/n, if so base-1
|
|
int base0 = 0;
|
|
int base1 = 0;
|
|
for (i = 0; i < (*nnz); i++) {
|
|
const int row = tempRowInd[i];
|
|
const int col = tempColInd[i];
|
|
if ((0 == row) || (0 == col)) {
|
|
base0 = 1;
|
|
}
|
|
if ((*m == row) || (*n == col)) {
|
|
base1 = 1;
|
|
}
|
|
}
|
|
if (base0 && base1) {
|
|
printf("Error: input matrix is base-0 and base-1 \n");
|
|
return 1;
|
|
}
|
|
|
|
base = 0;
|
|
if (base1) {
|
|
base = 1;
|
|
}
|
|
|
|
/* compress the appropriate indices */
|
|
if (csrFormat) {
|
|
/* CSR format (assuming row-major format) */
|
|
csrRowPtr = (int *)malloc(((*m) + 1) * sizeof(csrRowPtr[0]));
|
|
if (!csrRowPtr) return 1;
|
|
compress_index(tempRowInd, *nnz, *m, csrRowPtr, base);
|
|
|
|
*aRowInd = csrRowPtr;
|
|
*aColInd = (int *)malloc((*nnz) * sizeof(int));
|
|
} else {
|
|
/* CSC format (assuming column-major format) */
|
|
cscColPtr = (int *)malloc(((*n) + 1) * sizeof(cscColPtr[0]));
|
|
if (!cscColPtr) return 1;
|
|
compress_index(tempColInd, *nnz, *n, cscColPtr, base);
|
|
|
|
*aColInd = cscColPtr;
|
|
*aRowInd = (int *)malloc((*nnz) * sizeof(int));
|
|
}
|
|
|
|
/* transfrom the matrix values of type double into one of the cusparse library
|
|
* types */
|
|
*aVal = (T_ELEM *)malloc((*nnz) * sizeof(T_ELEM));
|
|
|
|
for (i = 0; i < (*nnz); i++) {
|
|
if (csrFormat) {
|
|
(*aColInd)[i] = tempColInd[i];
|
|
} else {
|
|
(*aRowInd)[i] = tempRowInd[i];
|
|
}
|
|
if (mm_is_real(matcode) || mm_is_integer(matcode)) {
|
|
(*aVal)[i] = cuGet<T_ELEM>(tempVal[work[i].p]);
|
|
} else {
|
|
(*aVal)[i] =
|
|
cuGet<T_ELEM>(tempVal[2 * work[i].p], tempVal[2 * work[i].p + 1]);
|
|
}
|
|
}
|
|
|
|
/* check for corruption */
|
|
int error_found;
|
|
if (csrFormat) {
|
|
error_found = verify_pattern(*m, *nnz, *aRowInd, *aColInd);
|
|
} else {
|
|
error_found = verify_pattern(*n, *nnz, *aColInd, *aRowInd);
|
|
}
|
|
if (error_found) {
|
|
fprintf(stderr, "!!!! verify_pattern failed\n");
|
|
return 1;
|
|
}
|
|
|
|
/* cleanup and exit */
|
|
free(work);
|
|
free(tempVal);
|
|
free(tempColInd);
|
|
free(tempRowInd);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* specific instantiation */
|
|
template int loadMMSparseMatrix<float>(char *filename, char elem_type,
|
|
bool csrFormat, int *m, int *n, int *nnz,
|
|
float **aVal, int **aRowInd,
|
|
int **aColInd, int extendSymMatrix);
|
|
|
|
template int loadMMSparseMatrix<double>(char *filename, char elem_type,
|
|
bool csrFormat, int *m, int *n,
|
|
int *nnz, double **aVal, int **aRowInd,
|
|
int **aColInd, int extendSymMatrix);
|
|
|
|
template int loadMMSparseMatrix<cuComplex>(char *filename, char elem_type,
|
|
bool csrFormat, int *m, int *n,
|
|
int *nnz, cuComplex **aVal,
|
|
int **aRowInd, int **aColInd,
|
|
int extendSymMatrix);
|
|
|
|
template int loadMMSparseMatrix<cuDoubleComplex>(
|
|
char *filename, char elem_type, bool csrFormat, int *m, int *n, int *nnz,
|
|
cuDoubleComplex **aVal, int **aRowInd, int **aColInd, int extendSymMatrix);
|