diff --git a/projects/clr/hipamd/docs/markdown/CUSPARSE_API_supported_by_HIP.md b/projects/clr/hipamd/docs/markdown/CUSPARSE_API_supported_by_HIP.md index 0d0ded1406..f552246f11 100644 --- a/projects/clr/hipamd/docs/markdown/CUSPARSE_API_supported_by_HIP.md +++ b/projects/clr/hipamd/docs/markdown/CUSPARSE_API_supported_by_HIP.md @@ -300,6 +300,7 @@ |`cusparseScsrgeam` | | |`cusparseDcsrgeam` | | |`cusparseCcsrgeam` | | +|`cusparseZcsrgeam` | | |`cusparseScsrgeam2_bufferSizeExt` | | |`cusparseDcsrgeam2_bufferSizeExt` | | |`cusparseCcsrgeam2_bufferSizeExt` | | @@ -308,6 +309,7 @@ |`cusparseScsrgemm` | | |`cusparseDcsrgemm` | | |`cusparseCcsrgemm` | | +|`cusparseZcsrgemm` | | |`cusparseScsrgemm2_bufferSizeExt` | | |`cusparseDcsrgemm2_bufferSizeExt` | | |`cusparseCcsrgemm2_bufferSizeExt` | | @@ -460,3 +462,157 @@ |`cusparseDgpsvInterleavedBatch` | | |`cusparseCgpsvInterleavedBatch` | | |`cusparseZgpsvInterleavedBatch` | | + +## **8. cuSPARSE Matrix Reorderings Reference** + +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------------------------| +|`cusparseScsrcolor` | | +|`cusparseDcsrcolor` | | +|`cusparseCcsrcolor` | | +|`cusparseZcsrcolor` | | + +## **9. cuSPARSE Format Conversion Reference** + +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------------------------| +|`cusparseSbsr2csr` | | +|`cusparseDbsr2csr` | | +|`cusparseCbsr2csr` | | +|`cusparseZbsr2csr` | | +|`cusparseSgebsr2gebsc_bufferSize` | | +|`cusparseDgebsr2gebsc_bufferSize` | | +|`cusparseCgebsr2gebsc_bufferSize` | | +|`cusparseZgebsr2gebsc_bufferSize` | | +|`cusparseSgebsr2gebsc` | | +|`cusparseDgebsr2gebsc` | | +|`cusparseCgebsr2gebsc` | | +|`cusparseZgebsr2gebsc` | | +|`cusparseSgebsr2gebsr_bufferSize` | | +|`cusparseDgebsr2gebsr_bufferSize` | | +|`cusparseCgebsr2gebsr_bufferSize` | | +|`cusparseZgebsr2gebsr_bufferSize` | | +|`cusparseXgebsr2gebsrNnz` | | +|`cusparseSgebsr2gebsr` | | +|`cusparseDgebsr2gebsr` | | +|`cusparseCgebsr2gebsr` | | +|`cusparseZgebsr2gebsr` | | +|`cusparseSgebsr2csr` | | +|`cusparseDgebsr2csr` | | +|`cusparseCgebsr2csr` | | +|`cusparseZgebsr2csr` | | +|`cusparseScsr2gebsr_bufferSize` | | +|`cusparseDcsr2gebsr_bufferSize` | | +|`cusparseCcsr2gebsr_bufferSize` | | +|`cusparseZcsr2gebsr_bufferSize` | | +|`cusparseXcsr2gebsrNnz` | | +|`cusparseScsr2gebsr` | | +|`cusparseDcsr2gebsr` | | +|`cusparseCcsr2gebsr` | | +|`cusparseZcsr2gebsr` | | +|`cusparseXcoo2csr` |`hipsparseXcoo2csr` | +|`cusparseScsc2dense` | | +|`cusparseDcsc2dense` | | +|`cusparseCcsc2dense` | | +|`cusparseZcsc2dense` | | +|`cusparseScsc2hyb` | | +|`cusparseDcsc2hyb` | | +|`cusparseCcsc2hyb` | | +|`cusparseZcsc2hyb` | | +|`cusparseXcsr2bsrNnz` | | +|`cusparseScsr2bsr` | | +|`cusparseDcsr2bsr` | | +|`cusparseCcsr2bsr` | | +|`cusparseZcsr2bsr` | | +|`cusparseXcsr2coo` |`hipsparseXcsr2coo` | +|`cusparseScsr2csc` |`hipsparseScsr2csc` | +|`cusparseDcsr2csc` |`hipsparseDcsr2csc` | +|`cusparseCcsr2csc` | | +|`cusparseZcsr2csc` | | +|`cusparseCsr2cscEx` | | +|`cusparseScsr2dense` | | +|`cusparseDcsr2dense` | | +|`cusparseCcsr2dense` | | +|`cusparseZcsr2dense` | | +|`cusparseScsr2csr_compress` | | +|`cusparseDcsr2csr_compress` | | +|`cusparseCcsr2csr_compress` | | +|`cusparseZcsr2csr_compress` | | +|`cusparseScsr2hyb` |`hipsparseScsr2hyb` | +|`cusparseDcsr2hyb` |`hipsparseDcsr2hyb` | +|`cusparseCcsr2hyb` | | +|`cusparseZcsr2hyb` | | +|`cusparseSdense2csc` | | +|`cusparseDdense2csc` | | +|`cusparseCdense2csc` | | +|`cusparseZdense2csc` | | +|`cusparseSdense2csr` | | +|`cusparseDdense2csr` | | +|`cusparseCdense2csr` | | +|`cusparseZdense2csr` | | +|`cusparseSdense2hyb` | | +|`cusparseDdense2hyb` | | +|`cusparseCdense2hyb` | | +|`cusparseZdense2hyb` | | +|`cusparseShyb2csc` | | +|`cusparseDhyb2csc` | | +|`cusparseChyb2csc` | | +|`cusparseZhyb2csc` | | +|`cusparseShyb2csr` | | +|`cusparseDhyb2csr` | | +|`cusparseChyb2csr` | | +|`cusparseZhyb2csr` | | +|`cusparseShyb2dense` | | +|`cusparseDhyb2dense` | | +|`cusparseChyb2dense` | | +|`cusparseZhyb2dense` | | +|`cusparseSnnz` | | +|`cusparseDnnz` | | +|`cusparseCnnz` | | +|`cusparseZnnz` | | +|`cusparseCreateIdentityPermutation` |`hipsparseCreateIdentityPermutation` | +|`cusparseXcoosort_bufferSizeExt` |`hipsparseXcoosort_bufferSizeExt` | +|`cusparseXcoosortByRow` |`hipsparseXcoosortByRow` | +|`cusparseXcoosortByColumn` |`hipsparseXcoosortByColumn` | +|`cusparseXcsrsort_bufferSizeExt` |`hipsparseXcsrsort_bufferSizeExt` | +|`cusparseXcsrsort` |`hipsparseXcsrsort` | +|`cusparseScusparseXcscsort_bufferSizeExtnnz` | | +|`cusparseXcscsort` | | +|`cusparseCreateCsru2csrInfo` | | +|`cusparseDestroyCsru2csrInfo` | | +|`cusparseScsru2csr_bufferSizeExt` | | +|`cusparseDcsru2csr_bufferSizeExt` | | +|`cusparseCcsru2csr_bufferSizeExt` | | +|`cusparseZcsru2csr_bufferSizeExt` | | +|`cusparseScsru2csr` | | +|`cusparseDcsru2csr` | | +|`cusparseCcsru2csr` | | +|`cusparseZcsru2csr` | | +|`cusparseHpruneDense2csr_bufferSizeExt` | | +|`cusparseSpruneDense2csr_bufferSizeExt` | | +|`cusparseDpruneDense2csr_bufferSizeExt` | | +|`cusparseHpruneDense2csrNnz` | | +|`cusparseSpruneDense2csrNnz` | | +|`cusparseDpruneDense2csrNnz` | | +|`cusparseHpruneCsr2csr_bufferSizeExt` | | +|`cusparseSpruneCsr2csr_bufferSizeExt` | | +|`cusparseDpruneCsr2csr_bufferSizeExt` | | +|`cusparseHpruneCsr2csrNnz` | | +|`cusparseSpruneCsr2csrNnz` | | +|`cusparseDpruneCsr2csrNnz` | | +|`cusparseHpruneDense2csrByPercentage_bufferSizeExt` | | +|`cusparseSpruneDense2csrByPercentage_bufferSizeExt` | | +|`cusparseDpruneDense2csrByPercentage_bufferSizeExt` | | +|`cusparseHpruneDense2csrNnzByPercentage` | | +|`cusparseSpruneDense2csrNnzByPercentage` | | +|`cusparseDpruneDense2csrNnzByPercentage` | | +|`cusparseHpruneCsr2csrByPercentage_bufferSizeExt` | | +|`cusparseSpruneCsr2csrByPercentage_bufferSizeExt` | | +|`cusparseDpruneCsr2csrByPercentage_bufferSizeExt` | | +|`cusparseHpruneCsr2csrNnzByPercentage` | | +|`cusparseSpruneCsr2csrNnzByPercentage` | | +|`cusparseDpruneCsr2csrNnzByPercentage` | | +|`cusparseSnnz_compress` | | +|`cusparseDnnz_compress` | | +|`cusparseCnnz_compress` | | +|`cusparseZnnz_compress` | | diff --git a/projects/clr/hipamd/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp b/projects/clr/hipamd/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp index fdb1a8381c..f3b0f0eb99 100644 --- a/projects/clr/hipamd/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp +++ b/projects/clr/hipamd/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp @@ -245,6 +245,7 @@ const std::map CUDA_SPARSE_FUNCTION_MAP{ {"cusparseScsrgeam", {"hipsparseScsrgeam", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseDcsrgeam", {"hipsparseDcsrgeam", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseCcsrgeam", {"hipsparseCcsrgeam", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsrgeam", {"hipsparseZcsrgeam", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseScsrgeam2_bufferSizeExt", {"hipsparseScsrgeam2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseDcsrgeam2_bufferSizeExt", {"hipsparseDcsrgeam2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, @@ -255,6 +256,7 @@ const std::map CUDA_SPARSE_FUNCTION_MAP{ {"cusparseScsrgemm", {"hipsparseScsrgemm", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseDcsrgemm", {"hipsparseDcsrgemm", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseCcsrgemm", {"hipsparseCcsrgemm", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsrgemm", {"hipsparseZcsrgemm", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseScsrgemm2_bufferSizeExt", {"hipsparseScsrgemm2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseDcsrgemm2_bufferSizeExt", {"hipsparseDcsrgemm2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, @@ -416,4 +418,192 @@ const std::map CUDA_SPARSE_FUNCTION_MAP{ {"cusparseDgpsvInterleavedBatch", {"hipsparseDgpsvInterleavedBatch", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseCgpsvInterleavedBatch", {"hipsparseCgpsvInterleavedBatch", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseZgpsvInterleavedBatch", {"hipsparseZgpsvInterleavedBatch", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + // 11. cuSPARSE Matrix Reorderings Reference + {"cusparseScsrcolor", {"hipsparseScsrcolor", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsrcolor", {"hipsparseDcsrcolor", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsrcolor", {"hipsparseCcsrcolor", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsrcolor", {"hipsparseZcsrcolor", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + // 12. cuSPARSE Format Conversion Reference + {"cusparseSbsr2csr", {"hipsparseSbsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDbsr2csr", {"hipsparseDbsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCbsr2csr", {"hipsparseCbsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZbsr2csr", {"hipsparseZbsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseSgebsr2gebsc_bufferSize", {"hipsparseSgebsr2gebsc_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDgebsr2gebsc_bufferSize", {"hipsparseDgebsr2gebsc_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCgebsr2gebsc_bufferSize", {"hipsparseCgebsr2gebsc_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZgebsr2gebsc_bufferSize", {"hipsparseZgebsr2gebsc_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseSgebsr2gebsc", {"hipsparseSgebsr2gebsc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDgebsr2gebsc", {"hipsparseDgebsr2gebsc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCgebsr2gebsc", {"hipsparseCgebsr2gebsc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZgebsr2gebsc", {"hipsparseZgebsr2gebsc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseSgebsr2gebsr_bufferSize", {"hipsparseSgebsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDgebsr2gebsr_bufferSize", {"hipsparseDgebsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCgebsr2gebsr_bufferSize", {"hipsparseCgebsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZgebsr2gebsr_bufferSize", {"hipsparseZgebsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseSgebsr2csr", {"hipsparseSgebsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDgebsr2csr", {"hipsparseDgebsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCgebsr2csr", {"hipsparseCgebsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZgebsr2csr", {"hipsparseZgebsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseXgebsr2gebsrNnz", {"hipsparseXgebsr2gebsrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseSgebsr2gebsr", {"hipsparseSgebsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDgebsr2gebsr", {"hipsparseDgebsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCgebsr2gebsr", {"hipsparseCgebsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZgebsr2gebsr", {"hipsparseZgebsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsr2gebsr_bufferSize", {"hipsparseScsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsr2gebsr_bufferSize", {"hipsparseDcsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsr2gebsr_bufferSize", {"hipsparseCcsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsr2gebsr_bufferSize", {"hipsparseZcsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseXcsr2gebsrNnz", {"hipsparseXcsr2gebsrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseScsr2gebsr", {"hipsparseScsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsr2gebsr", {"hipsparseDcsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsr2gebsr", {"hipsparseCcsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsr2gebsr", {"hipsparseZcsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseXcoo2csr", {"hipsparseXcoo2csr", CONV_LIB_FUNC, API_SPARSE}}, + + {"cusparseScsc2dense", {"hipsparseScsc2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsc2dense", {"hipsparseDcsc2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsc2dense", {"hipsparseCcsc2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsc2dense", {"hipsparseZcsc2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsc2hyb", {"hipsparseScsc2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsc2hyb", {"hipsparseDcsc2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsc2hyb", {"hipsparseCcsc2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsc2hyb", {"hipsparseZcsc2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseXcsr2bsrNnz", {"hipsparseXcsr2bsrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseScsr2bsr", {"hipsparseScsr2bsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsr2bsr", {"hipsparseDcsr2bsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsr2bsr", {"hipsparseCcsr2bsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsr2bsr", {"hipsparseZcsr2bsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseXcsr2coo", {"hipsparseXcsr2coo", CONV_LIB_FUNC, API_SPARSE}}, + + {"cusparseScsr2csc", {"hipsparseScsr2csc", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseDcsr2csc", {"hipsparseDcsr2csc", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseCcsr2csc", {"hipsparseCcsr2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsr2csc", {"hipsparseZcsr2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseCsr2cscEx", {"hipsparseCsr2cscEx", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsr2dense", {"hipsparseScsr2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsr2dense", {"hipsparseDcsr2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsr2dense", {"hipsparseCcsr2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsr2dense", {"hipsparseZcsr2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsr2csr_compress", {"hipsparseScsr2csr_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsr2csr_compress", {"hipsparseDcsr2csr_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsr2csr_compress", {"hipsparseDcsr2csr_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsr2csr_compress", {"hipsparseZcsr2csr_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsr2hyb", {"hipsparseScsr2hyb", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseDcsr2hyb", {"hipsparseDcsr2hyb", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseCcsr2hyb", {"hipsparseCcsr2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsr2hyb", {"hipsparseZcsr2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseSdense2csc", {"hipsparseSdense2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDdense2csc", {"hipsparseDdense2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCdense2csc", {"hipsparseCdense2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZdense2csc", {"hipsparseZdense2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseSdense2csr", {"hipsparseSdense2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDdense2csr", {"hipsparseDdense2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCdense2csr", {"hipsparseCdense2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZdense2csr", {"hipsparseZdense2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseSdense2hyb", {"hipsparseSdense2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDdense2hyb", {"hipsparseDdense2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCdense2hyb", {"hipsparseCdense2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZdense2hyb", {"hipsparseZdense2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseShyb2csc", {"hipsparseShyb2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDhyb2csc", {"hipsparseDhyb2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseChyb2csc", {"hipsparseChyb2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZhyb2csc", {"hipsparseZhyb2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseShyb2csr", {"hipsparseShyb2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDhyb2csr", {"hipsparseDhyb2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseChyb2csr", {"hipsparseChyb2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZhyb2csr", {"hipsparseZhyb2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseShyb2dense", {"hipsparseShyb2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDhyb2dense", {"hipsparseDhyb2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseChyb2dense", {"hipsparseChyb2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZhyb2dense", {"hipsparseZhyb2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseSnnz", {"hipsparseSnnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDnnz", {"hipsparseDnnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCnnz", {"hipsparseCnnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZnnz", {"hipsparseZnnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseCreateIdentityPermutation", {"hipsparseCreateIdentityPermutation", CONV_LIB_FUNC, API_SPARSE}}, + + {"cusparseXcoosort_bufferSizeExt", {"hipsparseXcoosort_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseXcoosortByRow", {"hipsparseXcoosortByRow", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseXcoosortByColumn", {"hipsparseXcoosortByColumn", CONV_LIB_FUNC, API_SPARSE}}, + + {"cusparseXcsrsort_bufferSizeExt", {"hipsparseXcsrsort_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseXcsrsort", {"hipsparseXcsrsort", CONV_LIB_FUNC, API_SPARSE}}, + + {"cusparseScusparseXcscsort_bufferSizeExtnnz", {"hipsparseXcscsort_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseXcscsort", {"hipsparseXcscsort", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseCreateCsru2csrInfo", {"hipsparseCreateCsru2csrInfo", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDestroyCsru2csrInfo", {"hipsparseDestroyCsru2csrInfo", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsru2csr_bufferSizeExt", {"hipsparseScsru2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsru2csr_bufferSizeExt", {"hipsparseDcsru2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsru2csr_bufferSizeExt", {"hipsparseCcsru2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsru2csr_bufferSizeExt", {"hipsparseZcsru2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsru2csr", {"hipsparseScsru2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsru2csr", {"hipsparseDcsru2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsru2csr", {"hipsparseCcsru2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsru2csr", {"hipsparseZcsru2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseHpruneDense2csr_bufferSizeExt", {"hipsparseHpruneDense2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseSpruneDense2csr_bufferSizeExt", {"hipsparseSpruneDense2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDpruneDense2csr_bufferSizeExt", {"hipsparseDpruneDense2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseHpruneDense2csrNnz", {"hipsparseHpruneDense2csrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseSpruneDense2csrNnz", {"hipsparseSpruneDense2csrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDpruneDense2csrNnz", {"hipsparseDpruneDense2csrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseHpruneCsr2csr_bufferSizeExt", {"hipsparseHpruneCsr2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseSpruneCsr2csr_bufferSizeExt", {"hipsparseSpruneCsr2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDpruneCsr2csr_bufferSizeExt", {"hipsparseDpruneCsr2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseHpruneCsr2csrNnz", {"hipsparseHpruneCsr2csrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseSpruneCsr2csrNnz", {"hipsparseSpruneCsr2csrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDpruneCsr2csrNnz", {"hipsparseDpruneCsr2csrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseHpruneDense2csrByPercentage_bufferSizeExt", {"hipsparseHpruneDense2csrByPercentage_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseSpruneDense2csrByPercentage_bufferSizeExt", {"hipsparseSpruneDense2csrByPercentage_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDpruneDense2csrByPercentage_bufferSizeExt", {"hipsparseDpruneDense2csrByPercentage_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseHpruneDense2csrNnzByPercentage", {"hipsparseHpruneDense2csrNnzByPercentage", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseSpruneDense2csrNnzByPercentage", {"hipsparseSpruneDense2csrNnzByPercentage", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDpruneDense2csrNnzByPercentage", {"hipsparseDpruneDense2csrNnzByPercentage", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseHpruneCsr2csrByPercentage_bufferSizeExt", {"hipsparseHpruneCsr2csrByPercentage_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseSpruneCsr2csrByPercentage_bufferSizeExt", {"hipsparseSpruneCsr2csrByPercentage_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDpruneCsr2csrByPercentage_bufferSizeExt", {"hipsparseDpruneCsr2csrByPercentage_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseHpruneCsr2csrNnzByPercentage", {"hipsparseHpruneCsr2csrNnzByPercentage", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseSpruneCsr2csrNnzByPercentage", {"hipsparseSpruneCsr2csrNnzByPercentage", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDpruneCsr2csrNnzByPercentage", {"hipsparseDpruneCsr2csrNnzByPercentage", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseSnnz_compress", {"hipsparseSnnz_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDnnz_compress", {"hipsparseDnnz_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCnnz_compress", {"hipsparseCnnz_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZnnz_compress", {"hipsparseZnnz_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, }; diff --git a/projects/clr/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_08.cu b/projects/clr/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_08.cu new file mode 100644 index 0000000000..fcfde8d3b2 --- /dev/null +++ b/projects/clr/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_08.cu @@ -0,0 +1,413 @@ +// RUN: %run_test hipify "%s" "%t" %cuda_args +#include +#include +#include +// CHECK: #include +#include +// CHECK: #include +#include +// CHECK: #include +#include + +// NOTE: CUDA 10.0 + +/* + * compute | b - A*x|_inf + */ +void residaul_eval( + int n, + const float *dl, + const float *d, + const float *du, + const float *b, + const float *x, + float *r_nrminf_ptr) +{ + float r_nrminf = 0; + for (int i = 0; i < n; i++) { + float dot = 0; + if (i > 0) { + dot += dl[i] * x[i - 1]; + } + dot += d[i] * x[i]; + if (i < (n - 1)) { + dot += du[i] * x[i + 1]; + } + float ri = b[i] - dot; + r_nrminf = (r_nrminf > fabs(ri)) ? r_nrminf : fabs(ri); + } + + *r_nrminf_ptr = r_nrminf; +} + +int main(int argc, char*argv[]) +{ + // CHECK: hipsparseHandle_t cusparseH = NULL; + cusparseHandle_t cusparseH = NULL; + // CHECK: hipblasHandle_t cublasH = NULL; + cublasHandle_t cublasH = NULL; + // CHECK: hipStream_t stream = NULL; + cudaStream_t stream = NULL; + // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; + cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; + // CHECK: hipblasStatus_t cublasStat = HIPBLAS_STATUS_SUCCESS; + cublasStatus_t cublasStat = CUBLAS_STATUS_SUCCESS; + // CHECK: hipError_t cudaStat1 = hipSuccess; + cudaError_t cudaStat1 = cudaSuccess; + + const int n = 3; + const int batchSize = 2; + /* + * | 1 6 0 | | 1 | | -0.603960 | + * A1 =| 4 2 7 |, b1 = | 2 |, x1 = | 0.267327 | + * | 0 5 3 | | 3 | | 0.554455 | + * + * | 8 13 0 | | 4 | | -0.063291 | + * A2 =| 11 9 14 |, b2 = | 5 |, x2 = | 0.346641 | + * | 0 12 10 | | 6 | | 0.184031 | + */ + + /* + * A = (dl, d, du), B and X are in aggregate format + */ + const float dl[n * batchSize] = { 0, 4, 5, 0, 11, 12 }; + const float d[n * batchSize] = { 1, 2, 3, 8, 9, 10 }; + const float du[n * batchSize] = { 6, 7, 0, 13, 14, 0 }; + const float B[n * batchSize] = { 1, 2, 3, 4, 5, 6 }; + float X[n * batchSize]; /* Xj = Aj \ Bj */ + +/* device memory + * (d_dl0, d_d0, d_du0) is aggregate format + * (d_dl, d_d, d_du) is interleaved format + */ + float *d_dl0 = NULL; + float *d_d0 = NULL; + float *d_du0 = NULL; + float *d_dl = NULL; + float *d_d = NULL; + float *d_du = NULL; + float *d_B = NULL; + float *d_X = NULL; + + size_t lworkInBytes = 0; + char *d_work = NULL; + + /* + * algo = 0: cuThomas (unstable) + * algo = 1: LU with pivoting (stable) + * algo = 2: QR (stable) + */ + const int algo = 2; + + const float h_one = 1; + const float h_zero = 0; + + printf("example of gtsv (interleaved format) \n"); + printf("choose algo = 0,1,2 to select different algorithms \n"); + printf("n = %d, batchSize = %d, algo = %d \n", n, batchSize, algo); + + /* step 1: create cusparse/cublas handle, bind a stream */ + // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); + cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: status = hipsparseCreate(&cusparseH); + status = cusparseCreate(&cusparseH); + //CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + // CHECK: status = hipsparseSetStream(cusparseH, stream); + status = cusparseSetStream(cusparseH, stream); + //CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + // CHECK: cublasStat = hipblasCreate(&cublasH); + cublasStat = cublasCreate(&cublasH); + // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); + assert(CUBLAS_STATUS_SUCCESS == cublasStat); + // CHECK: cublasStat = hipblasSetStream(cublasH, stream); + cublasStat = cublasSetStream(cublasH, stream); + // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); + assert(CUBLAS_STATUS_SUCCESS == cublasStat); + + /* step 2: allocate device memory */ + // CHECK: cudaStat1 = hipMalloc((void**)&d_dl0, sizeof(float)*n*batchSize); + cudaStat1 = cudaMalloc((void**)&d_dl0, sizeof(float)*n*batchSize); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: cudaStat1 = hipMalloc((void**)&d_d0, sizeof(float)*n*batchSize); + cudaStat1 = cudaMalloc((void**)&d_d0, sizeof(float)*n*batchSize); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: cudaStat1 = hipMalloc((void**)&d_du0, sizeof(float)*n*batchSize); + cudaStat1 = cudaMalloc((void**)&d_du0, sizeof(float)*n*batchSize); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: cudaStat1 = hipMalloc((void**)&d_dl, sizeof(float)*n*batchSize); + cudaStat1 = cudaMalloc((void**)&d_dl, sizeof(float)*n*batchSize); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: cudaStat1 = hipMalloc((void**)&d_d, sizeof(float)*n*batchSize); + cudaStat1 = cudaMalloc((void**)&d_d, sizeof(float)*n*batchSize); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: cudaStat1 = hipMalloc((void**)&d_du, sizeof(float)*n*batchSize); + cudaStat1 = cudaMalloc((void**)&d_du, sizeof(float)*n*batchSize); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: cudaStat1 = hipMalloc((void**)&d_B, sizeof(float)*n*batchSize); + cudaStat1 = cudaMalloc((void**)&d_B, sizeof(float)*n*batchSize); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: cudaStat1 = hipMalloc((void**)&d_X, sizeof(float)*n*batchSize); + cudaStat1 = cudaMalloc((void**)&d_X, sizeof(float)*n*batchSize); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + + /* step 3: prepare data in device, interleaved format */ + // CHECK: cudaStat1 = hipMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); + cudaStat1 = cudaMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: cudaStat1 = hipMemcpy(d_d0, d, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); + cudaStat1 = cudaMemcpy(d_d0, d, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: cudaStat1 = hipMemcpy(d_du0, du, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); + cudaStat1 = cudaMemcpy(d_du0, du, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: cudaStat1 = hipMemcpy(d_B, B, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); + cudaStat1 = cudaMemcpy(d_B, B, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: hipDeviceSynchronize(); + cudaDeviceSynchronize(); + /* convert dl to interleaved format + * dl = transpose(dl0) + */ + // CHECK: cublasStat = hipblasSgeam( + // CHECK: HIPBLAS_OP_T, + // CHECK: HIPBLAS_OP_T, + cublasStat = cublasSgeam( + cublasH, + CUBLAS_OP_T, /* transa */ + CUBLAS_OP_T, /* transb, don't care */ + batchSize, /* number of rows of dl */ + n, /* number of columns of dl */ + &h_one, + d_dl0, /* dl0 is n-by-batchSize */ + n, /* leading dimension of dl0 */ + &h_zero, + NULL, + n, /* don't cae */ + d_dl, /* dl is batchSize-by-n */ + batchSize /* leading dimension of dl */ + ); + // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); + assert(CUBLAS_STATUS_SUCCESS == cublasStat); + /* convert d to interleaved format + * d = transpose(d0) + */ + // CHECK: cublasStat = hipblasSgeam( + // CHECK: HIPBLAS_OP_T, + // CHECK: HIPBLAS_OP_T, + cublasStat = cublasSgeam( + cublasH, + CUBLAS_OP_T, /* transa */ + CUBLAS_OP_T, /* transb, don't care */ + batchSize, /* number of rows of d */ + n, /* number of columns of d */ + &h_one, + d_d0, /* d0 is n-by-batchSize */ + n, /* leading dimension of d0 */ + &h_zero, + NULL, + n, /* don't cae */ + d_d, /* d is batchSize-by-n */ + batchSize /* leading dimension of d */ + ); + // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); + assert(CUBLAS_STATUS_SUCCESS == cublasStat); + + /* convert du to interleaved format + * du = transpose(du0) + */ + // CHECK: cublasStat = hipblasSgeam( + // CHECK: HIPBLAS_OP_T, + // CHECK: HIPBLAS_OP_T, + cublasStat = cublasSgeam( + cublasH, + CUBLAS_OP_T, /* transa */ + CUBLAS_OP_T, /* transb, don't care */ + batchSize, /* number of rows of du */ + n, /* number of columns of du */ + &h_one, + d_du0, /* du0 is n-by-batchSize */ + n, /* leading dimension of du0 */ + &h_zero, + NULL, + n, /* don't cae */ + d_du, /* du is batchSize-by-n */ + batchSize /* leading dimension of du */ + ); + // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); + assert(CUBLAS_STATUS_SUCCESS == cublasStat); + + /* convert B to interleaved format + * X = transpose(B) + */ + // CHECK: cublasStat = hipblasSgeam( + // CHECK: HIPBLAS_OP_T, + // CHECK: HIPBLAS_OP_T, + cublasStat = cublasSgeam( + cublasH, + CUBLAS_OP_T, /* transa */ + CUBLAS_OP_T, /* transb, don't care */ + batchSize, /* number of rows of X */ + n, /* number of columns of X */ + &h_one, + d_B, /* B is n-by-batchSize */ + n, /* leading dimension of B */ + &h_zero, + NULL, + n, /* don't cae */ + d_X, /* X is batchSize-by-n */ + batchSize /* leading dimension of X */ + ); + // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); + assert(CUBLAS_STATUS_SUCCESS == cublasStat); + /* step 4: prepare workspace */ + // NOTE: CUDA 10.0 + // CHECK: status = hipsparseSgtsvInterleavedBatch_bufferSizeExt( + status = cusparseSgtsvInterleavedBatch_bufferSizeExt( + cusparseH, + algo, + n, + d_dl, + d_d, + d_du, + d_X, + batchSize, + &lworkInBytes); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + + printf("lworkInBytes = %lld \n", (long long)lworkInBytes); + // CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes); + cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + + /* step 5: solve Aj*xj = bj */ + // NOTE: CUDA 10.0 + // CHECK: status = hipsparseSgtsvInterleavedBatch( + status = cusparseSgtsvInterleavedBatch( + cusparseH, + algo, + n, + d_dl, + d_d, + d_du, + d_X, + batchSize, + d_work); + // CHECK: cudaStat1 = hipDeviceSynchronize(); + cudaStat1 = cudaDeviceSynchronize(); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + + /* step 6: convert X back to aggregate format */ + /* B = transpose(X) */ + // CHECK: cublasStat = hipblasSgeam( + // CHECK: HIPBLAS_OP_T, + // CHECK: HIPBLAS_OP_T, + cublasStat = cublasSgeam( + cublasH, + CUBLAS_OP_T, /* transa */ + CUBLAS_OP_T, /* transb, don't care */ + n, /* number of rows of B */ + batchSize, /* number of columns of B */ + &h_one, + d_X, /* X is batchSize-by-n */ + batchSize, /* leading dimension of X */ + &h_zero, + NULL, + n, /* don't cae */ + d_B, /* B is n-by-batchSize */ + n /* leading dimension of B */ + ); + // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); + assert(CUBLAS_STATUS_SUCCESS == cublasStat); + // CHECK: hipDeviceSynchronize(); + cudaDeviceSynchronize(); + + /* step 7: residual evaluation */ + // CHECK: cudaStat1 = hipMemcpy(X, d_B, sizeof(float)*n*batchSize, hipMemcpyDeviceToHost); + cudaStat1 = cudaMemcpy(X, d_B, sizeof(float)*n*batchSize, cudaMemcpyDeviceToHost); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: hipDeviceSynchronize(); + cudaDeviceSynchronize(); + printf("==== x1 = inv(A1)*b1 \n"); + for (int j = 0; j < n; j++) { + printf("x1[%d] = %f\n", j, X[j]); + } + + float r1_nrminf; + residaul_eval( + n, + dl, + d, + du, + B, + X, + &r1_nrminf + ); + printf("|b1 - A1*x1| = %E\n", r1_nrminf); + + printf("\n==== x2 = inv(A2)*b2 \n"); + for (int j = 0; j < n; j++) { + printf("x2[%d] = %f\n", j, X[n + j]); + } + + float r2_nrminf; + residaul_eval( + n, + dl + n, + d + n, + du + n, + B + n, + X + n, + &r2_nrminf + ); + printf("|b2 - A2*x2| = %E\n", r2_nrminf); + + /* free resources */ + // CHECK: if (d_dl0) hipFree(d_dl0); + if (d_dl0) cudaFree(d_dl0); + // CHECK: if (d_d0) hipFree(d_d0); + if (d_d0) cudaFree(d_d0); + // CHECK: if (d_du0) hipFree(d_du0); + if (d_du0) cudaFree(d_du0); + // CHECK: if (d_dl) hipFree(d_dl); + if (d_dl) cudaFree(d_dl); + // CHECK: if (d_d) hipFree(d_d); + if (d_d) cudaFree(d_d); + // CHECK: if (d_du) hipFree(d_du); + if (d_du) cudaFree(d_du); + // CHECK: if (d_B) hipFree(d_B); + if (d_B) cudaFree(d_B); + // CHECK: if (d_X) hipFree(d_X); + if (d_X) cudaFree(d_X); + // CHECK: if (cusparseH) hipsparseDestroy(cusparseH); + if (cusparseH) cusparseDestroy(cusparseH); + // CHECK: if (cublasH) hipblasDestroy(cublasH); + if (cublasH) cublasDestroy(cublasH); + // CHECK: if (stream) hipStreamDestroy(stream); + if (stream) cudaStreamDestroy(stream); + // CHECK: hipDeviceReset(); + cudaDeviceReset(); + + return 0; +} diff --git a/projects/clr/hipamd/tests/hipify-clang/lit.cfg b/projects/clr/hipamd/tests/hipify-clang/lit.cfg index 98961fe166..b9ff0b2495 100644 --- a/projects/clr/hipamd/tests/hipify-clang/lit.cfg +++ b/projects/clr/hipamd/tests/hipify-clang/lit.cfg @@ -23,6 +23,10 @@ config.test_source_root = os.path.dirname(__file__) config.excludes = ['cmdparser.hpp'] +config.cuda_version = "@CUDA_VERSION@" +if config.cuda_version not in ['10.0']: + config.excludes.append('cuSPARSE_08.cu') + # test_exec_root: The path where tests are located (default is the test suite root). #config.test_exec_root = config.test_source_root