/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include "mmio.h" /* avoid Windows warnings (for example: strcpy, fscanf, etc.) */ #if defined(_WIN32) #define _CRT_SECURE_NO_WARNINGS #endif /* various __inline__ __device__ function to initialize a T_ELEM */ template __inline__ T_ELEM cuGet(int); template <> __inline__ float cuGet(int x) { return float(x); } template <> __inline__ double cuGet(int x) { return double(x); } template <> __inline__ cuComplex cuGet(int x) { return (make_cuComplex(float(x), 0.0f)); } template <> __inline__ cuDoubleComplex cuGet(int x) { return (make_cuDoubleComplex(double(x), 0.0)); } template __inline__ T_ELEM cuGet(int, int); template <> __inline__ float cuGet(int x, int y) { return float(x); } template <> __inline__ double cuGet(int x, int y) { return double(x); } template <> __inline__ cuComplex cuGet(int x, int y) { return make_cuComplex(float(x), float(y)); } template <> __inline__ cuDoubleComplex cuGet(int x, int y) { return (make_cuDoubleComplex(double(x), double(y))); } template __inline__ T_ELEM cuGet(float); template <> __inline__ float cuGet(float x) { return float(x); } template <> __inline__ double cuGet(float x) { return double(x); } template <> __inline__ cuComplex cuGet(float x) { return (make_cuComplex(float(x), 0.0f)); } template <> __inline__ cuDoubleComplex cuGet(float x) { return (make_cuDoubleComplex(double(x), 0.0)); } template __inline__ T_ELEM cuGet(float, float); template <> __inline__ float cuGet(float x, float y) { return float(x); } template <> __inline__ double cuGet(float x, float y) { return double(x); } template <> __inline__ cuComplex cuGet(float x, float y) { return (make_cuComplex(float(x), float(y))); } template <> __inline__ cuDoubleComplex cuGet(float x, float y) { return (make_cuDoubleComplex(double(x), double(y))); } template __inline__ T_ELEM cuGet(double); template <> __inline__ float cuGet(double x) { return float(x); } template <> __inline__ double cuGet(double x) { return double(x); } template <> __inline__ cuComplex cuGet(double x) { return (make_cuComplex(float(x), 0.0f)); } template <> __inline__ cuDoubleComplex cuGet(double x) { return (make_cuDoubleComplex(double(x), 0.0)); } template __inline__ T_ELEM cuGet(double, double); template <> __inline__ float cuGet(double x, double y) { return float(x); } template <> __inline__ double cuGet(double x, double y) { return double(x); } template <> __inline__ cuComplex cuGet(double x, double y) { return (make_cuComplex(float(x), float(y))); } template <> __inline__ cuDoubleComplex cuGet(double x, double y) { return (make_cuDoubleComplex(double(x), double(y))); } static void compress_index(const int *Ind, int nnz, int m, int *Ptr, int base) { int i; /* initialize everything to zero */ for (i = 0; i < m + 1; i++) { Ptr[i] = 0; } /* count elements in every row */ Ptr[0] = base; for (i = 0; i < nnz; i++) { Ptr[Ind[i] + (1 - base)]++; } /* add all the values */ for (i = 0; i < m; i++) { Ptr[i + 1] += Ptr[i]; } } struct cooFormat { int i; int j; int p; // permutation }; int cmp_cooFormat_csr(struct cooFormat *s, struct cooFormat *t) { if (s->i < t->i) { return -1; } else if (s->i > t->i) { return 1; } else { return s->j - t->j; } } int cmp_cooFormat_csc(struct cooFormat *s, struct cooFormat *t) { if (s->j < t->j) { return -1; } else if (s->j > t->j) { return 1; } else { return s->i - t->i; } } typedef int (*FUNPTR)(const void *, const void *); typedef int (*FUNPTR2)(struct cooFormat *s, struct cooFormat *t); static FUNPTR2 fptr_array[2] = { cmp_cooFormat_csr, cmp_cooFormat_csc, }; static int verify_pattern(int m, int nnz, int *csrRowPtr, int *csrColInd) { int i, col, start, end, base_index; int error_found = 0; if (nnz != (csrRowPtr[m] - csrRowPtr[0])) { fprintf(stderr, "Error (nnz check failed): (csrRowPtr[%d]=%d - csrRowPtr[%d]=%d) " "!= (nnz=%d)\n", 0, csrRowPtr[0], m, csrRowPtr[m], nnz); error_found = 1; } base_index = csrRowPtr[0]; if ((0 != base_index) && (1 != base_index)) { fprintf(stderr, "Error (base index check failed): base index = %d\n", base_index); error_found = 1; } for (i = 0; (!error_found) && (i < m); i++) { start = csrRowPtr[i] - base_index; end = csrRowPtr[i + 1] - base_index; if (start > end) { fprintf( stderr, "Error (corrupted row): csrRowPtr[%d] (=%d) > csrRowPtr[%d] (=%d)\n", i, start + base_index, i + 1, end + base_index); error_found = 1; } for (col = start; col < end; col++) { if (csrColInd[col] < base_index) { fprintf( stderr, "Error (column vs. base index check failed): csrColInd[%d] < %d\n", col, base_index); error_found = 1; } if ((col < (end - 1)) && (csrColInd[col] >= csrColInd[col + 1])) { fprintf(stderr, "Error (sorting of the column indecis check failed): " "(csrColInd[%d]=%d) >= (csrColInd[%d]=%d)\n", col, csrColInd[col], col + 1, csrColInd[col + 1]); error_found = 1; } } } return error_found; } template int loadMMSparseMatrix(char *filename, char elem_type, bool csrFormat, int *m, int *n, int *nnz, T_ELEM **aVal, int **aRowInd, int **aColInd, int extendSymMatrix) { MM_typecode matcode; double *tempVal; int *tempRowInd, *tempColInd; double *tval; int *trow, *tcol; int *csrRowPtr, *cscColPtr; int i, j, error, base, count; struct cooFormat *work; /* read the matrix */ error = mm_read_mtx_crd(filename, m, n, nnz, &trow, &tcol, &tval, &matcode); if (error) { fprintf(stderr, "!!!! can not open file: '%s'\n", filename); return 1; } /* start error checking */ if (mm_is_complex(matcode) && ((elem_type != 'z') && (elem_type != 'c'))) { fprintf(stderr, "!!!! complex matrix requires type 'z' or 'c'\n"); return 1; } if (mm_is_dense(matcode) || mm_is_array(matcode) || mm_is_pattern(matcode) /*|| mm_is_integer(matcode)*/) { fprintf( stderr, "!!!! dense, array, pattern and integer matrices are not supported\n"); return 1; } /* if necessary symmetrize the pattern (transform from triangular to full) */ if ((extendSymMatrix) && (mm_is_symmetric(matcode) || mm_is_hermitian(matcode) || mm_is_skew(matcode))) { // count number of non-diagonal elements count = 0; for (i = 0; i < (*nnz); i++) { if (trow[i] != tcol[i]) { count++; } } // allocate space for the symmetrized matrix tempRowInd = (int *)malloc((*nnz + count) * sizeof(int)); tempColInd = (int *)malloc((*nnz + count) * sizeof(int)); if (mm_is_real(matcode) || mm_is_integer(matcode)) { tempVal = (double *)malloc((*nnz + count) * sizeof(double)); } else { tempVal = (double *)malloc(2 * (*nnz + count) * sizeof(double)); } // copy the elements regular and transposed locations for (j = 0, i = 0; i < (*nnz); i++) { tempRowInd[j] = trow[i]; tempColInd[j] = tcol[i]; if (mm_is_real(matcode) || mm_is_integer(matcode)) { tempVal[j] = tval[i]; } else { tempVal[2 * j] = tval[2 * i]; tempVal[2 * j + 1] = tval[2 * i + 1]; } j++; if (trow[i] != tcol[i]) { tempRowInd[j] = tcol[i]; tempColInd[j] = trow[i]; if (mm_is_real(matcode) || mm_is_integer(matcode)) { if (mm_is_skew(matcode)) { tempVal[j] = -tval[i]; } else { tempVal[j] = tval[i]; } } else { if (mm_is_hermitian(matcode)) { tempVal[2 * j] = tval[2 * i]; tempVal[2 * j + 1] = -tval[2 * i + 1]; } else { tempVal[2 * j] = tval[2 * i]; tempVal[2 * j + 1] = tval[2 * i + 1]; } } j++; } } (*nnz) += count; // free temporary storage free(trow); free(tcol); free(tval); } else { tempRowInd = trow; tempColInd = tcol; tempVal = tval; } // life time of (trow, tcol, tval) is over. // please use COO format (tempRowInd, tempColInd, tempVal) // use qsort to sort COO format work = (struct cooFormat *)malloc(sizeof(struct cooFormat) * (*nnz)); if (NULL == work) { fprintf(stderr, "!!!! allocation error, malloc failed\n"); return 1; } for (i = 0; i < (*nnz); i++) { work[i].i = tempRowInd[i]; work[i].j = tempColInd[i]; work[i].p = i; // permutation is identity } if (csrFormat) { /* create row-major ordering of indices (sorted by row and within each row * by column) */ qsort(work, *nnz, sizeof(struct cooFormat), (FUNPTR)fptr_array[0]); } else { /* create column-major ordering of indices (sorted by column and within each * column by row) */ qsort(work, *nnz, sizeof(struct cooFormat), (FUNPTR)fptr_array[1]); } // (tempRowInd, tempColInd) is sorted either by row-major or by col-major for (i = 0; i < (*nnz); i++) { tempRowInd[i] = work[i].i; tempColInd[i] = work[i].j; } // setup base // check if there is any row/col 0, if so base-0 // check if there is any row/col equal to matrix dimension m/n, if so base-1 int base0 = 0; int base1 = 0; for (i = 0; i < (*nnz); i++) { const int row = tempRowInd[i]; const int col = tempColInd[i]; if ((0 == row) || (0 == col)) { base0 = 1; } if ((*m == row) || (*n == col)) { base1 = 1; } } if (base0 && base1) { printf("Error: input matrix is base-0 and base-1 \n"); return 1; } base = 0; if (base1) { base = 1; } /* compress the appropriate indices */ if (csrFormat) { /* CSR format (assuming row-major format) */ csrRowPtr = (int *)malloc(((*m) + 1) * sizeof(csrRowPtr[0])); if (!csrRowPtr) return 1; compress_index(tempRowInd, *nnz, *m, csrRowPtr, base); *aRowInd = csrRowPtr; *aColInd = (int *)malloc((*nnz) * sizeof(int)); } else { /* CSC format (assuming column-major format) */ cscColPtr = (int *)malloc(((*n) + 1) * sizeof(cscColPtr[0])); if (!cscColPtr) return 1; compress_index(tempColInd, *nnz, *n, cscColPtr, base); *aColInd = cscColPtr; *aRowInd = (int *)malloc((*nnz) * sizeof(int)); } /* transfrom the matrix values of type double into one of the cusparse library * types */ *aVal = (T_ELEM *)malloc((*nnz) * sizeof(T_ELEM)); for (i = 0; i < (*nnz); i++) { if (csrFormat) { (*aColInd)[i] = tempColInd[i]; } else { (*aRowInd)[i] = tempRowInd[i]; } if (mm_is_real(matcode) || mm_is_integer(matcode)) { (*aVal)[i] = cuGet(tempVal[work[i].p]); } else { (*aVal)[i] = cuGet(tempVal[2 * work[i].p], tempVal[2 * work[i].p + 1]); } } /* check for corruption */ int error_found; if (csrFormat) { error_found = verify_pattern(*m, *nnz, *aRowInd, *aColInd); } else { error_found = verify_pattern(*n, *nnz, *aColInd, *aRowInd); } if (error_found) { fprintf(stderr, "!!!! verify_pattern failed\n"); return 1; } /* cleanup and exit */ free(work); free(tempVal); free(tempColInd); free(tempRowInd); return 0; } /* specific instantiation */ template int loadMMSparseMatrix(char *filename, char elem_type, bool csrFormat, int *m, int *n, int *nnz, float **aVal, int **aRowInd, int **aColInd, int extendSymMatrix); template int loadMMSparseMatrix(char *filename, char elem_type, bool csrFormat, int *m, int *n, int *nnz, double **aVal, int **aRowInd, int **aColInd, int extendSymMatrix); template int loadMMSparseMatrix(char *filename, char elem_type, bool csrFormat, int *m, int *n, int *nnz, cuComplex **aVal, int **aRowInd, int **aColInd, int extendSymMatrix); template int loadMMSparseMatrix( char *filename, char elem_type, bool csrFormat, int *m, int *n, int *nnz, cuDoubleComplex **aVal, int **aRowInd, int **aColInd, int extendSymMatrix);