From a384efcecfb183e70c213bc285fbee12d529d829 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 27 Nov 2018 11:57:25 +0300 Subject: [PATCH] [HIPIFY][SPARSE] Level 3 functions + cuSPARSE_03 test + update CUSPARSE_API_supported_by_HIP.md --- .../markdown/CUSPARSE_API_supported_by_HIP.md | 56 +++++ .../src/CUDA2HIP_SPARSE_API_functions.cpp | 65 +++++ .../hipify-clang/cuSPARSE/cuSPARSE_01.cu | 4 +- .../hipify-clang/cuSPARSE/cuSPARSE_02.cu | 2 +- .../hipify-clang/cuSPARSE/cuSPARSE_03.cu | 229 ++++++++++++++++++ 5 files changed, 353 insertions(+), 3 deletions(-) create mode 100644 hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_03.cu diff --git a/hipamd/docs/markdown/CUSPARSE_API_supported_by_HIP.md b/hipamd/docs/markdown/CUSPARSE_API_supported_by_HIP.md index d3707573a5..e871de927f 100644 --- a/hipamd/docs/markdown/CUSPARSE_API_supported_by_HIP.md +++ b/hipamd/docs/markdown/CUSPARSE_API_supported_by_HIP.md @@ -236,3 +236,59 @@ |`cusparseDhybsv_solve` | | |`cusparseChybsv_solve` | | |`cusparseZhybsv_solve` | | + +## **5.cuSPARSE Level 3 Function Reference** + +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------------------------| +|`cusparseScsrmm` |`hipsparseScsrmm` | +|`cusparseDcsrmm` |`hipsparseDcsrmm` | +|`cusparseCcsrmm` | | +|`cusparseZcsrmm` | | +|`cusparseScsrmm2` |`hipsparseScsrmm2` | +|`cusparseDcsrmm2` |`hipsparseDcsrmm2` | +|`cusparseCcsrmm2` | | +|`cusparseZcsrmm2` | | +|`cusparseScsrsm_analysis` | | +|`cusparseDcsrsm_analysis` | | +|`cusparseCcsrsm_analysis` | | +|`cusparseZcsrsm_analysis` | | +|`cusparseScsrsm_solve` | | +|`cusparseDcsrsm_solve` | | +|`cusparseCcsrsm_solve` | | +|`cusparseZcsrsm_solve` | | +|`cusparseScsrsm2_bufferSizeExt` | | +|`cusparseDcsrsm2_bufferSizeExt` | | +|`cusparseCcsrsm2_bufferSizeExt` | | +|`cusparseZcsrsm2_bufferSizeExt` | | +|`cusparseScsrsm2_analysis` | | +|`cusparseDcsrsm2_analysis` | | +|`cusparseCcsrsm2_analysis` | | +|`cusparseZcsrsm2_analysis` | | +|`cusparseScsrsm2_solve` | | +|`cusparseDcsrsm2_solve` | | +|`cusparseCcsrsm2_solve` | | +|`cusparseZcsrsm2_solve` | | +|`cusparseXcsrsm2_zeroPivot` | | +|`cusparseSbsrmm` | | +|`cusparseDbsrmm` | | +|`cusparseCbsrmm` | | +|`cusparseZbsrmm` | | +|`cusparseSbsrsm2_bufferSize` | | +|`cusparseDbsrsm2_bufferSize` | | +|`cusparseCbsrsm2_bufferSize` | | +|`cusparseZbsrsm2_bufferSize` | | +|`cusparseSbsrsm2_analysis` | | +|`cusparseDbsrsm2_analysis` | | +|`cusparseCbsrsm2_analysis` | | +|`cusparseZbsrsm2_analysis` | | +|`cusparseSbsrsm2_solve` | | +|`cusparseDbsrsm2_solve` | | +|`cusparseCbsrsm2_solve` | | +|`cusparseZbsrsm2_solve` | | +|`cusparseXbsrsm2_zeroPivot` | | +|`cusparseSgemmi` | | +|`cusparseDgemmi` | | +|`cusparseCgemmi` | | +|`cusparseZgemmi` | | + diff --git a/hipamd/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp b/hipamd/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp index 429c8d0ea2..1ea1365c19 100644 --- a/hipamd/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp +++ b/hipamd/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp @@ -174,4 +174,69 @@ const std::map CUDA_SPARSE_FUNCTION_MAP{ {"cusparseDhybsv_solve", {"hipsparseDhybsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseChybsv_solve", {"hipsparseChybsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseZhybsv_solve", {"hipsparseZhybsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + // 8. cuSPARSE Level 3 Function Reference + {"cusparseScsrmm", {"hipsparseScsrmm", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseDcsrmm", {"hipsparseDcsrmm", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseCcsrmm", {"hipsparseCcsrmm", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsrmm", {"hipsparseZcsrmm", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsrmm2", {"hipsparseScsrmm2", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseDcsrmm2", {"hipsparseDcsrmm2", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseCcsrmm2", {"hipsparseCcsrmm2", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsrmm2", {"hipsparseZcsrmm2", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsrsm_analysis", {"hipsparseScsrsm_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsrsm_analysis", {"hipsparseDcsrsm_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsrsm_analysis", {"hipsparseCcsrsm_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsrsm_analysis", {"hipsparseZcsrsm_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsrsm_solve", {"hipsparseScsrsm_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsrsm_solve", {"hipsparseDcsrsm_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsrsm_solve", {"hipsparseCcsrsm_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsrsm_solve", {"hipsparseZcsrsm_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsrsm2_bufferSizeExt", {"hipsparseScsrsm2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsrsm2_bufferSizeExt", {"hipsparseDcsrsm2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsrsm2_bufferSizeExt", {"hipsparseCcsrsm2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsrsm2_bufferSizeExt", {"hipsparseZcsrsm2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsrsm2_analysis", {"hipsparseScsrsm2_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsrsm2_analysis", {"hipsparseDcsrsm2_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsrsm2_analysis", {"hipsparseCcsrsm2_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsrsm2_analysis", {"hipsparseZcsrsm2_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsrsm2_solve", {"hipsparseScsrsm2_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsrsm2_solve", {"hipsparseDcsrsm2_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsrsm2_solve", {"hipsparseCcsrsm2_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsrsm2_solve", {"hipsparseZcsrsm2_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseXcsrsm2_zeroPivot", {"hipsparseXcsrsm2_zeroPivot", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseSbsrmm", {"hipsparseSbsrmm", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDbsrmm", {"hipsparseDbsrmm", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCbsrmm", {"hipsparseCbsrmm", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZbsrmm", {"hipsparseZbsrmm", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseSbsrsm2_bufferSize", {"hipsparseCbsrsm2_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDbsrsm2_bufferSize", {"hipsparseDbsrsm2_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCbsrsm2_bufferSize", {"hipsparseCbsrsm2_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZbsrsm2_bufferSize", {"hipsparseZbsrsm2_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseSbsrsm2_analysis", {"hipsparseSbsrsm2_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDbsrsm2_analysis", {"hipsparseDbsrsm2_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCbsrsm2_analysis", {"hipsparseCbsrsm2_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZbsrsm2_analysis", {"hipsparseZbsrsm2_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseSbsrsm2_solve", {"hipsparseSbsrsm2_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDbsrsm2_solve", {"hipsparseDbsrsm2_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCbsrsm2_solve", {"hipsparseCbsrsm2_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZbsrsm2_solve", {"hipsparseZbsrsm2_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseXbsrsm2_zeroPivot", {"hipsparseXbsrsm2_zeroPivot", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseSgemmi", {"hipsparseSgemmi", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDgemmi", {"hipsparseDgemmi", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCgemmi", {"hipsparseCgemmi", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZgemmi", {"hipsparseZgemmi", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, }; diff --git a/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_01.cu b/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_01.cu index 5ef7c188ee..df2499c041 100644 --- a/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_01.cu +++ b/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_01.cu @@ -261,7 +261,7 @@ int main(){ return 2; } /* exercise Level 1 routines (scatter vector elements) */ - // TODO: status= hipsparseDsctr(handle, nnz_vector, xVal, xInd, + // CHECK: status= hipsparseDsctr(handle, nnz_vector, xVal, xInd, // CHECK: &y[n], HIPSPARSE_INDEX_BASE_ZERO); status= cusparseDsctr(handle, nnz_vector, xVal, xInd, &y[n], CUSPARSE_INDEX_BASE_ZERO); @@ -299,7 +299,7 @@ int main(){ CLEANUP("Memset on Device failed"); return 1; } - // TODO: status= hipsparseDcsrmm(handle, HIPSPARSE_OPERATION_NON_TRANSPOSE, n, 2, n, + // CHECK: status= hipsparseDcsrmm(handle, HIPSPARSE_OPERATION_NON_TRANSPOSE, n, 2, n, status= cusparseDcsrmm(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n, 2, n, nnz, &dfive, descr, cooVal, csrRowPtr, cooColIndex, y, n, &dzero, z, n+1); diff --git a/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_02.cu b/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_02.cu index ab2defefe7..de7629367f 100644 --- a/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_02.cu +++ b/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_02.cu @@ -116,7 +116,7 @@ int main(int argc, char*argv[]) assert(CUSPARSE_STATUS_SUCCESS == cusparseStat); // step 2: configuration of matrix A - // cusparseStat = hipsparseCreateMatDescr(&descrA); + // CHECK: cusparseStat = hipsparseCreateMatDescr(&descrA); cusparseStat = cusparseCreateMatDescr(&descrA); // assert(HIPSPARSE_STATUS_SUCCESS == cusparseStat); assert(CUSPARSE_STATUS_SUCCESS == cusparseStat); diff --git a/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_03.cu b/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_03.cu new file mode 100644 index 0000000000..ef52072576 --- /dev/null +++ b/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_03.cu @@ -0,0 +1,229 @@ +// RUN: %run_test hipify "%s" "%t" %cuda_args +#include +#include +#include +// CHECK: #include +#include +// CHECK: #include "hipsparse.h" +#include "cusparse.h" + +int main(int argc, char*argv[]) +{ + // CHECK: hipsparseHandle_t handle = NULL; + cusparseHandle_t handle = NULL; + // CHECK: hipStream_t stream = NULL; + cudaStream_t stream = NULL; + // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; + cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; + // CHECK: hipError_t cudaStat1 = hipSuccess; + // CHECK: hipError_t cudaStat2 = hipSuccess; + // CHECK: hipError_t cudaStat3 = hipSuccess; + // CHECK: hipError_t cudaStat4 = hipSuccess; + // CHECK: hipError_t cudaStat5 = hipSuccess; + // CHECK: hipError_t cudaStat6 = hipSuccess; + cudaError_t cudaStat1 = cudaSuccess; + cudaError_t cudaStat2 = cudaSuccess; + cudaError_t cudaStat3 = cudaSuccess; + cudaError_t cudaStat4 = cudaSuccess; + cudaError_t cudaStat5 = cudaSuccess; + cudaError_t cudaStat6 = cudaSuccess; + + /* + * A is a 3x3 sparse matrix + * | 1 2 0 | + * A = | 0 5 0 | + * | 0 8 0 | + */ + const int m = 3; + const int n = 3; + const int nnz = 4; + +#if 0 + /* index starts at 0 */ + int h_cooRows[nnz] = { 2, 1, 0, 0 }; + int h_cooCols[nnz] = { 1, 1, 0, 1 }; +#else + /* index starts at -2 */ + int h_cooRows[nnz] = { 0, -1, -2, -2 }; + int h_cooCols[nnz] = { -1, -1, -2, -1 }; +#endif + double h_cooVals[nnz] = { 8.0, 5.0, 1.0, 2.0 }; + int h_P[nnz]; + + int *d_cooRows = NULL; + int *d_cooCols = NULL; + int *d_P = NULL; + double *d_cooVals = NULL; + double *d_cooVals_sorted = NULL; + size_t pBufferSizeInBytes = 0; + void *pBuffer = NULL; + + printf("m = %d, n = %d, nnz=%d \n", m, n, nnz); + + /* step 1: create cusparse handle, bind a stream */ + // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); + cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: status = hipsparseCreate(&handle); + status = cusparseCreate(&handle); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + // CHECK: status = hipsparseSetStream(handle, stream); + status = cusparseSetStream(handle, stream); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + + /* step 2: allocate buffer */ + // TODO: status = hipsparseXcoosort_bufferSizeExt( + status = cusparseXcoosort_bufferSizeExt( + handle, + m, + n, + nnz, + d_cooRows, + d_cooCols, + &pBufferSizeInBytes + ); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + + printf("pBufferSizeInBytes = %lld bytes \n", (long long)pBufferSizeInBytes); + + // CHECK: cudaStat1 = hipMalloc(&d_cooRows, sizeof(int)*nnz); + cudaStat1 = cudaMalloc(&d_cooRows, sizeof(int)*nnz); + // CHECK: cudaStat2 = hipMalloc(&d_cooCols, sizeof(int)*nnz); + cudaStat2 = cudaMalloc(&d_cooCols, sizeof(int)*nnz); + // CHECK: cudaStat3 = hipMalloc(&d_P, sizeof(int)*nnz); + cudaStat3 = cudaMalloc(&d_P, sizeof(int)*nnz); + // CHECK: cudaStat4 = hipMalloc(&d_cooVals, sizeof(double)*nnz); + cudaStat4 = cudaMalloc(&d_cooVals, sizeof(double)*nnz); + // CHECK: cudaStat5 = hipMalloc(&d_cooVals_sorted, sizeof(double)*nnz); + cudaStat5 = cudaMalloc(&d_cooVals_sorted, sizeof(double)*nnz); + // CHECK: cudaStat6 = hipMalloc(&pBuffer, sizeof(char)* pBufferSizeInBytes); + cudaStat6 = cudaMalloc(&pBuffer, sizeof(char)* pBufferSizeInBytes); + + // CHECK: assert(hipSuccess == cudaStat1); + // CHECK: assert(hipSuccess == cudaStat2); + // CHECK: assert(hipSuccess == cudaStat3); + // CHECK: assert(hipSuccess == cudaStat4); + // CHECK: assert(hipSuccess == cudaStat5); + // CHECK: assert(hipSuccess == cudaStat6); + assert(cudaSuccess == cudaStat1); + assert(cudaSuccess == cudaStat2); + assert(cudaSuccess == cudaStat3); + assert(cudaSuccess == cudaStat4); + assert(cudaSuccess == cudaStat5); + assert(cudaSuccess == cudaStat6); + + // CHECK: cudaStat1 = hipMemcpy(d_cooRows, h_cooRows, sizeof(int)*nnz, hipMemcpyHostToDevice); + cudaStat1 = cudaMemcpy(d_cooRows, h_cooRows, sizeof(int)*nnz, cudaMemcpyHostToDevice); + // CHECK: cudaStat2 = hipMemcpy(d_cooCols, h_cooCols, sizeof(int)*nnz, hipMemcpyHostToDevice); + cudaStat2 = cudaMemcpy(d_cooCols, h_cooCols, sizeof(int)*nnz, cudaMemcpyHostToDevice); + // CHECK: cudaStat3 = hipMemcpy(d_cooVals, h_cooVals, sizeof(double)*nnz, hipMemcpyHostToDevice); + cudaStat3 = cudaMemcpy(d_cooVals, h_cooVals, sizeof(double)*nnz, cudaMemcpyHostToDevice); + // CHECK: cudaStat4 = hipDeviceSynchronize(); + cudaStat4 = cudaDeviceSynchronize(); + + // CHECK: assert(hipSuccess == cudaStat1); + // CHECK: assert(hipSuccess == cudaStat2); + // CHECK: assert(hipSuccess == cudaStat3); + // CHECK: assert(hipSuccess == cudaStat4); + assert(cudaSuccess == cudaStat1); + assert(cudaSuccess == cudaStat2); + assert(cudaSuccess == cudaStat3); + assert(cudaSuccess == cudaStat4); + + /* step 3: setup permutation vector P to identity */ + // TODO: status = hipsparseCreateIdentityPermutation( + status = cusparseCreateIdentityPermutation( + handle, + nnz, + d_P); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + + /* step 4: sort COO format by Row */ + // TODO: status = hipsparseXcoosortByRow( + status = cusparseXcoosortByRow( + handle, + m, + n, + nnz, + d_cooRows, + d_cooCols, + d_P, + pBuffer + ); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + + /* step 5: gather sorted cooVals */ + // CHECK: status = hipsparseDgthr( + // CHECK: HIPSPARSE_INDEX_BASE_ZERO + status = cusparseDgthr( + handle, + nnz, + d_cooVals, + d_cooVals_sorted, + d_P, + CUSPARSE_INDEX_BASE_ZERO + ); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + /* wait until the computation is done */ + // CHECK: cudaStat1 = hipDeviceSynchronize(); + cudaStat1 = cudaDeviceSynchronize(); + // CHECK: cudaStat2 = hipMemcpy(h_cooRows, d_cooRows, sizeof(int)*nnz, hipMemcpyDeviceToHost); + cudaStat2 = cudaMemcpy(h_cooRows, d_cooRows, sizeof(int)*nnz, cudaMemcpyDeviceToHost); + // CHECK: cudaStat3 = hipMemcpy(h_cooCols, d_cooCols, sizeof(int)*nnz, hipMemcpyDeviceToHost); + cudaStat3 = cudaMemcpy(h_cooCols, d_cooCols, sizeof(int)*nnz, cudaMemcpyDeviceToHost); + // CHECK: cudaStat4 = hipMemcpy(h_P, d_P, sizeof(int)*nnz, hipMemcpyDeviceToHost); + cudaStat4 = cudaMemcpy(h_P, d_P, sizeof(int)*nnz, cudaMemcpyDeviceToHost); + // CHECK: cudaStat5 = hipMemcpy(h_cooVals, d_cooVals_sorted, sizeof(double)*nnz, hipMemcpyDeviceToHost); + cudaStat5 = cudaMemcpy(h_cooVals, d_cooVals_sorted, sizeof(double)*nnz, cudaMemcpyDeviceToHost); + // CHECK: cudaStat6 = hipDeviceSynchronize(); + cudaStat6 = cudaDeviceSynchronize(); + // CHECK: assert(hipSuccess == cudaStat1); + // CHECK: assert(hipSuccess == cudaStat2); + // CHECK: assert(hipSuccess == cudaStat3); + // CHECK: assert(hipSuccess == cudaStat4); + // CHECK: assert(hipSuccess == cudaStat5); + // CHECK: assert(hipSuccess == cudaStat6); + assert(cudaSuccess == cudaStat1); + assert(cudaSuccess == cudaStat2); + assert(cudaSuccess == cudaStat3); + assert(cudaSuccess == cudaStat4); + assert(cudaSuccess == cudaStat5); + assert(cudaSuccess == cudaStat6); + + printf("sorted coo: \n"); + for (int j = 0; j < nnz; j++) { + printf("(%d, %d, %f) \n", h_cooRows[j], h_cooCols[j], h_cooVals[j]); + } + + for (int j = 0; j < nnz; j++) { + printf("P[%d] = %d \n", j, h_P[j]); + } + + /* free resources */ + // CHECK: if (d_cooRows) hipFree(d_cooRows); + if (d_cooRows) cudaFree(d_cooRows); + // CHECK: if (d_cooCols) hipFree(d_cooCols); + if (d_cooCols) cudaFree(d_cooCols); + // CHECK: if (d_P) hipFree(d_P); + if (d_P) cudaFree(d_P); + // CHECK: if (d_cooVals) hipFree(d_cooVals); + if (d_cooVals) cudaFree(d_cooVals); + // CHECK: if (d_cooVals_sorted) hipFree(d_cooVals_sorted); + if (d_cooVals_sorted) cudaFree(d_cooVals_sorted); + // CHECK: if (pBuffer) hipFree(pBuffer); + if (pBuffer) cudaFree(pBuffer); + // if (handle) hipsparseDestroy(handle); + if (handle) cusparseDestroy(handle); + // CHECK: if (stream) hipStreamDestroy(stream); + if (stream) cudaStreamDestroy(stream); + // CHECK: hipDeviceReset(); + cudaDeviceReset(); + return 0; +}