Merge pull request #784 from emankov/master
[HIPIFY][SPARSE] Matrix Reorderings and Format Conversion Reference
[ROCm/clr commit: 7de2948334]
This commit is contained in:
@@ -300,6 +300,7 @@
|
||||
|`cusparseScsrgeam` | |
|
||||
|`cusparseDcsrgeam` | |
|
||||
|`cusparseCcsrgeam` | |
|
||||
|`cusparseZcsrgeam` | |
|
||||
|`cusparseScsrgeam2_bufferSizeExt` | |
|
||||
|`cusparseDcsrgeam2_bufferSizeExt` | |
|
||||
|`cusparseCcsrgeam2_bufferSizeExt` | |
|
||||
@@ -308,6 +309,7 @@
|
||||
|`cusparseScsrgemm` | |
|
||||
|`cusparseDcsrgemm` | |
|
||||
|`cusparseCcsrgemm` | |
|
||||
|`cusparseZcsrgemm` | |
|
||||
|`cusparseScsrgemm2_bufferSizeExt` | |
|
||||
|`cusparseDcsrgemm2_bufferSizeExt` | |
|
||||
|`cusparseCcsrgemm2_bufferSizeExt` | |
|
||||
@@ -460,3 +462,157 @@
|
||||
|`cusparseDgpsvInterleavedBatch` | |
|
||||
|`cusparseCgpsvInterleavedBatch` | |
|
||||
|`cusparseZgpsvInterleavedBatch` | |
|
||||
|
||||
## **8. cuSPARSE Matrix Reorderings Reference**
|
||||
|
||||
| **CUDA** | **HIP** |
|
||||
|-----------------------------------------------------------|-------------------------------------------------|
|
||||
|`cusparseScsrcolor` | |
|
||||
|`cusparseDcsrcolor` | |
|
||||
|`cusparseCcsrcolor` | |
|
||||
|`cusparseZcsrcolor` | |
|
||||
|
||||
## **9. cuSPARSE Format Conversion Reference**
|
||||
|
||||
| **CUDA** | **HIP** |
|
||||
|-----------------------------------------------------------|-------------------------------------------------|
|
||||
|`cusparseSbsr2csr` | |
|
||||
|`cusparseDbsr2csr` | |
|
||||
|`cusparseCbsr2csr` | |
|
||||
|`cusparseZbsr2csr` | |
|
||||
|`cusparseSgebsr2gebsc_bufferSize` | |
|
||||
|`cusparseDgebsr2gebsc_bufferSize` | |
|
||||
|`cusparseCgebsr2gebsc_bufferSize` | |
|
||||
|`cusparseZgebsr2gebsc_bufferSize` | |
|
||||
|`cusparseSgebsr2gebsc` | |
|
||||
|`cusparseDgebsr2gebsc` | |
|
||||
|`cusparseCgebsr2gebsc` | |
|
||||
|`cusparseZgebsr2gebsc` | |
|
||||
|`cusparseSgebsr2gebsr_bufferSize` | |
|
||||
|`cusparseDgebsr2gebsr_bufferSize` | |
|
||||
|`cusparseCgebsr2gebsr_bufferSize` | |
|
||||
|`cusparseZgebsr2gebsr_bufferSize` | |
|
||||
|`cusparseXgebsr2gebsrNnz` | |
|
||||
|`cusparseSgebsr2gebsr` | |
|
||||
|`cusparseDgebsr2gebsr` | |
|
||||
|`cusparseCgebsr2gebsr` | |
|
||||
|`cusparseZgebsr2gebsr` | |
|
||||
|`cusparseSgebsr2csr` | |
|
||||
|`cusparseDgebsr2csr` | |
|
||||
|`cusparseCgebsr2csr` | |
|
||||
|`cusparseZgebsr2csr` | |
|
||||
|`cusparseScsr2gebsr_bufferSize` | |
|
||||
|`cusparseDcsr2gebsr_bufferSize` | |
|
||||
|`cusparseCcsr2gebsr_bufferSize` | |
|
||||
|`cusparseZcsr2gebsr_bufferSize` | |
|
||||
|`cusparseXcsr2gebsrNnz` | |
|
||||
|`cusparseScsr2gebsr` | |
|
||||
|`cusparseDcsr2gebsr` | |
|
||||
|`cusparseCcsr2gebsr` | |
|
||||
|`cusparseZcsr2gebsr` | |
|
||||
|`cusparseXcoo2csr` |`hipsparseXcoo2csr` |
|
||||
|`cusparseScsc2dense` | |
|
||||
|`cusparseDcsc2dense` | |
|
||||
|`cusparseCcsc2dense` | |
|
||||
|`cusparseZcsc2dense` | |
|
||||
|`cusparseScsc2hyb` | |
|
||||
|`cusparseDcsc2hyb` | |
|
||||
|`cusparseCcsc2hyb` | |
|
||||
|`cusparseZcsc2hyb` | |
|
||||
|`cusparseXcsr2bsrNnz` | |
|
||||
|`cusparseScsr2bsr` | |
|
||||
|`cusparseDcsr2bsr` | |
|
||||
|`cusparseCcsr2bsr` | |
|
||||
|`cusparseZcsr2bsr` | |
|
||||
|`cusparseXcsr2coo` |`hipsparseXcsr2coo` |
|
||||
|`cusparseScsr2csc` |`hipsparseScsr2csc` |
|
||||
|`cusparseDcsr2csc` |`hipsparseDcsr2csc` |
|
||||
|`cusparseCcsr2csc` | |
|
||||
|`cusparseZcsr2csc` | |
|
||||
|`cusparseCsr2cscEx` | |
|
||||
|`cusparseScsr2dense` | |
|
||||
|`cusparseDcsr2dense` | |
|
||||
|`cusparseCcsr2dense` | |
|
||||
|`cusparseZcsr2dense` | |
|
||||
|`cusparseScsr2csr_compress` | |
|
||||
|`cusparseDcsr2csr_compress` | |
|
||||
|`cusparseCcsr2csr_compress` | |
|
||||
|`cusparseZcsr2csr_compress` | |
|
||||
|`cusparseScsr2hyb` |`hipsparseScsr2hyb` |
|
||||
|`cusparseDcsr2hyb` |`hipsparseDcsr2hyb` |
|
||||
|`cusparseCcsr2hyb` | |
|
||||
|`cusparseZcsr2hyb` | |
|
||||
|`cusparseSdense2csc` | |
|
||||
|`cusparseDdense2csc` | |
|
||||
|`cusparseCdense2csc` | |
|
||||
|`cusparseZdense2csc` | |
|
||||
|`cusparseSdense2csr` | |
|
||||
|`cusparseDdense2csr` | |
|
||||
|`cusparseCdense2csr` | |
|
||||
|`cusparseZdense2csr` | |
|
||||
|`cusparseSdense2hyb` | |
|
||||
|`cusparseDdense2hyb` | |
|
||||
|`cusparseCdense2hyb` | |
|
||||
|`cusparseZdense2hyb` | |
|
||||
|`cusparseShyb2csc` | |
|
||||
|`cusparseDhyb2csc` | |
|
||||
|`cusparseChyb2csc` | |
|
||||
|`cusparseZhyb2csc` | |
|
||||
|`cusparseShyb2csr` | |
|
||||
|`cusparseDhyb2csr` | |
|
||||
|`cusparseChyb2csr` | |
|
||||
|`cusparseZhyb2csr` | |
|
||||
|`cusparseShyb2dense` | |
|
||||
|`cusparseDhyb2dense` | |
|
||||
|`cusparseChyb2dense` | |
|
||||
|`cusparseZhyb2dense` | |
|
||||
|`cusparseSnnz` | |
|
||||
|`cusparseDnnz` | |
|
||||
|`cusparseCnnz` | |
|
||||
|`cusparseZnnz` | |
|
||||
|`cusparseCreateIdentityPermutation` |`hipsparseCreateIdentityPermutation` |
|
||||
|`cusparseXcoosort_bufferSizeExt` |`hipsparseXcoosort_bufferSizeExt` |
|
||||
|`cusparseXcoosortByRow` |`hipsparseXcoosortByRow` |
|
||||
|`cusparseXcoosortByColumn` |`hipsparseXcoosortByColumn` |
|
||||
|`cusparseXcsrsort_bufferSizeExt` |`hipsparseXcsrsort_bufferSizeExt` |
|
||||
|`cusparseXcsrsort` |`hipsparseXcsrsort` |
|
||||
|`cusparseScusparseXcscsort_bufferSizeExtnnz` | |
|
||||
|`cusparseXcscsort` | |
|
||||
|`cusparseCreateCsru2csrInfo` | |
|
||||
|`cusparseDestroyCsru2csrInfo` | |
|
||||
|`cusparseScsru2csr_bufferSizeExt` | |
|
||||
|`cusparseDcsru2csr_bufferSizeExt` | |
|
||||
|`cusparseCcsru2csr_bufferSizeExt` | |
|
||||
|`cusparseZcsru2csr_bufferSizeExt` | |
|
||||
|`cusparseScsru2csr` | |
|
||||
|`cusparseDcsru2csr` | |
|
||||
|`cusparseCcsru2csr` | |
|
||||
|`cusparseZcsru2csr` | |
|
||||
|`cusparseHpruneDense2csr_bufferSizeExt` | |
|
||||
|`cusparseSpruneDense2csr_bufferSizeExt` | |
|
||||
|`cusparseDpruneDense2csr_bufferSizeExt` | |
|
||||
|`cusparseHpruneDense2csrNnz` | |
|
||||
|`cusparseSpruneDense2csrNnz` | |
|
||||
|`cusparseDpruneDense2csrNnz` | |
|
||||
|`cusparseHpruneCsr2csr_bufferSizeExt` | |
|
||||
|`cusparseSpruneCsr2csr_bufferSizeExt` | |
|
||||
|`cusparseDpruneCsr2csr_bufferSizeExt` | |
|
||||
|`cusparseHpruneCsr2csrNnz` | |
|
||||
|`cusparseSpruneCsr2csrNnz` | |
|
||||
|`cusparseDpruneCsr2csrNnz` | |
|
||||
|`cusparseHpruneDense2csrByPercentage_bufferSizeExt` | |
|
||||
|`cusparseSpruneDense2csrByPercentage_bufferSizeExt` | |
|
||||
|`cusparseDpruneDense2csrByPercentage_bufferSizeExt` | |
|
||||
|`cusparseHpruneDense2csrNnzByPercentage` | |
|
||||
|`cusparseSpruneDense2csrNnzByPercentage` | |
|
||||
|`cusparseDpruneDense2csrNnzByPercentage` | |
|
||||
|`cusparseHpruneCsr2csrByPercentage_bufferSizeExt` | |
|
||||
|`cusparseSpruneCsr2csrByPercentage_bufferSizeExt` | |
|
||||
|`cusparseDpruneCsr2csrByPercentage_bufferSizeExt` | |
|
||||
|`cusparseHpruneCsr2csrNnzByPercentage` | |
|
||||
|`cusparseSpruneCsr2csrNnzByPercentage` | |
|
||||
|`cusparseDpruneCsr2csrNnzByPercentage` | |
|
||||
|`cusparseSnnz_compress` | |
|
||||
|`cusparseDnnz_compress` | |
|
||||
|`cusparseCnnz_compress` | |
|
||||
|`cusparseZnnz_compress` | |
|
||||
|
||||
@@ -245,6 +245,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_SPARSE_FUNCTION_MAP{
|
||||
{"cusparseScsrgeam", {"hipsparseScsrgeam", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDcsrgeam", {"hipsparseDcsrgeam", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCcsrgeam", {"hipsparseCcsrgeam", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsrgeam", {"hipsparseZcsrgeam", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseScsrgeam2_bufferSizeExt", {"hipsparseScsrgeam2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDcsrgeam2_bufferSizeExt", {"hipsparseDcsrgeam2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
@@ -255,6 +256,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_SPARSE_FUNCTION_MAP{
|
||||
{"cusparseScsrgemm", {"hipsparseScsrgemm", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDcsrgemm", {"hipsparseDcsrgemm", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCcsrgemm", {"hipsparseCcsrgemm", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsrgemm", {"hipsparseZcsrgemm", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseScsrgemm2_bufferSizeExt", {"hipsparseScsrgemm2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDcsrgemm2_bufferSizeExt", {"hipsparseDcsrgemm2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
@@ -416,4 +418,192 @@ const std::map<llvm::StringRef, hipCounter> CUDA_SPARSE_FUNCTION_MAP{
|
||||
{"cusparseDgpsvInterleavedBatch", {"hipsparseDgpsvInterleavedBatch", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCgpsvInterleavedBatch", {"hipsparseCgpsvInterleavedBatch", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZgpsvInterleavedBatch", {"hipsparseZgpsvInterleavedBatch", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
// 11. cuSPARSE Matrix Reorderings Reference
|
||||
{"cusparseScsrcolor", {"hipsparseScsrcolor", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDcsrcolor", {"hipsparseDcsrcolor", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCcsrcolor", {"hipsparseCcsrcolor", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsrcolor", {"hipsparseZcsrcolor", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
// 12. cuSPARSE Format Conversion Reference
|
||||
{"cusparseSbsr2csr", {"hipsparseSbsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDbsr2csr", {"hipsparseDbsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCbsr2csr", {"hipsparseCbsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZbsr2csr", {"hipsparseZbsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseSgebsr2gebsc_bufferSize", {"hipsparseSgebsr2gebsc_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDgebsr2gebsc_bufferSize", {"hipsparseDgebsr2gebsc_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCgebsr2gebsc_bufferSize", {"hipsparseCgebsr2gebsc_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZgebsr2gebsc_bufferSize", {"hipsparseZgebsr2gebsc_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseSgebsr2gebsc", {"hipsparseSgebsr2gebsc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDgebsr2gebsc", {"hipsparseDgebsr2gebsc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCgebsr2gebsc", {"hipsparseCgebsr2gebsc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZgebsr2gebsc", {"hipsparseZgebsr2gebsc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseSgebsr2gebsr_bufferSize", {"hipsparseSgebsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDgebsr2gebsr_bufferSize", {"hipsparseDgebsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCgebsr2gebsr_bufferSize", {"hipsparseCgebsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZgebsr2gebsr_bufferSize", {"hipsparseZgebsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseSgebsr2csr", {"hipsparseSgebsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDgebsr2csr", {"hipsparseDgebsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCgebsr2csr", {"hipsparseCgebsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZgebsr2csr", {"hipsparseZgebsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseXgebsr2gebsrNnz", {"hipsparseXgebsr2gebsrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseSgebsr2gebsr", {"hipsparseSgebsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDgebsr2gebsr", {"hipsparseDgebsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCgebsr2gebsr", {"hipsparseCgebsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZgebsr2gebsr", {"hipsparseZgebsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseScsr2gebsr_bufferSize", {"hipsparseScsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDcsr2gebsr_bufferSize", {"hipsparseDcsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCcsr2gebsr_bufferSize", {"hipsparseCcsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsr2gebsr_bufferSize", {"hipsparseZcsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseXcsr2gebsrNnz", {"hipsparseXcsr2gebsrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseScsr2gebsr", {"hipsparseScsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDcsr2gebsr", {"hipsparseDcsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCcsr2gebsr", {"hipsparseCcsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsr2gebsr", {"hipsparseZcsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseXcoo2csr", {"hipsparseXcoo2csr", CONV_LIB_FUNC, API_SPARSE}},
|
||||
|
||||
{"cusparseScsc2dense", {"hipsparseScsc2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDcsc2dense", {"hipsparseDcsc2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCcsc2dense", {"hipsparseCcsc2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsc2dense", {"hipsparseZcsc2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseScsc2hyb", {"hipsparseScsc2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDcsc2hyb", {"hipsparseDcsc2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCcsc2hyb", {"hipsparseCcsc2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsc2hyb", {"hipsparseZcsc2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseXcsr2bsrNnz", {"hipsparseXcsr2bsrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseScsr2bsr", {"hipsparseScsr2bsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDcsr2bsr", {"hipsparseDcsr2bsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCcsr2bsr", {"hipsparseCcsr2bsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsr2bsr", {"hipsparseZcsr2bsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseXcsr2coo", {"hipsparseXcsr2coo", CONV_LIB_FUNC, API_SPARSE}},
|
||||
|
||||
{"cusparseScsr2csc", {"hipsparseScsr2csc", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseDcsr2csc", {"hipsparseDcsr2csc", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseCcsr2csc", {"hipsparseCcsr2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsr2csc", {"hipsparseZcsr2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseCsr2cscEx", {"hipsparseCsr2cscEx", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseScsr2dense", {"hipsparseScsr2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDcsr2dense", {"hipsparseDcsr2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCcsr2dense", {"hipsparseCcsr2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsr2dense", {"hipsparseZcsr2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseScsr2csr_compress", {"hipsparseScsr2csr_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDcsr2csr_compress", {"hipsparseDcsr2csr_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDcsr2csr_compress", {"hipsparseDcsr2csr_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsr2csr_compress", {"hipsparseZcsr2csr_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseScsr2hyb", {"hipsparseScsr2hyb", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseDcsr2hyb", {"hipsparseDcsr2hyb", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseCcsr2hyb", {"hipsparseCcsr2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsr2hyb", {"hipsparseZcsr2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseSdense2csc", {"hipsparseSdense2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDdense2csc", {"hipsparseDdense2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCdense2csc", {"hipsparseCdense2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZdense2csc", {"hipsparseZdense2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseSdense2csr", {"hipsparseSdense2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDdense2csr", {"hipsparseDdense2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCdense2csr", {"hipsparseCdense2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZdense2csr", {"hipsparseZdense2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseSdense2hyb", {"hipsparseSdense2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDdense2hyb", {"hipsparseDdense2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCdense2hyb", {"hipsparseCdense2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZdense2hyb", {"hipsparseZdense2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseShyb2csc", {"hipsparseShyb2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDhyb2csc", {"hipsparseDhyb2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseChyb2csc", {"hipsparseChyb2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZhyb2csc", {"hipsparseZhyb2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseShyb2csr", {"hipsparseShyb2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDhyb2csr", {"hipsparseDhyb2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseChyb2csr", {"hipsparseChyb2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZhyb2csr", {"hipsparseZhyb2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseShyb2dense", {"hipsparseShyb2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDhyb2dense", {"hipsparseDhyb2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseChyb2dense", {"hipsparseChyb2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZhyb2dense", {"hipsparseZhyb2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseSnnz", {"hipsparseSnnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDnnz", {"hipsparseDnnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCnnz", {"hipsparseCnnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZnnz", {"hipsparseZnnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseCreateIdentityPermutation", {"hipsparseCreateIdentityPermutation", CONV_LIB_FUNC, API_SPARSE}},
|
||||
|
||||
{"cusparseXcoosort_bufferSizeExt", {"hipsparseXcoosort_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseXcoosortByRow", {"hipsparseXcoosortByRow", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseXcoosortByColumn", {"hipsparseXcoosortByColumn", CONV_LIB_FUNC, API_SPARSE}},
|
||||
|
||||
{"cusparseXcsrsort_bufferSizeExt", {"hipsparseXcsrsort_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseXcsrsort", {"hipsparseXcsrsort", CONV_LIB_FUNC, API_SPARSE}},
|
||||
|
||||
{"cusparseScusparseXcscsort_bufferSizeExtnnz", {"hipsparseXcscsort_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseXcscsort", {"hipsparseXcscsort", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseCreateCsru2csrInfo", {"hipsparseCreateCsru2csrInfo", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDestroyCsru2csrInfo", {"hipsparseDestroyCsru2csrInfo", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseScsru2csr_bufferSizeExt", {"hipsparseScsru2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDcsru2csr_bufferSizeExt", {"hipsparseDcsru2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCcsru2csr_bufferSizeExt", {"hipsparseCcsru2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsru2csr_bufferSizeExt", {"hipsparseZcsru2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseScsru2csr", {"hipsparseScsru2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDcsru2csr", {"hipsparseDcsru2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCcsru2csr", {"hipsparseCcsru2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsru2csr", {"hipsparseZcsru2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseHpruneDense2csr_bufferSizeExt", {"hipsparseHpruneDense2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseSpruneDense2csr_bufferSizeExt", {"hipsparseSpruneDense2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDpruneDense2csr_bufferSizeExt", {"hipsparseDpruneDense2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseHpruneDense2csrNnz", {"hipsparseHpruneDense2csrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseSpruneDense2csrNnz", {"hipsparseSpruneDense2csrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDpruneDense2csrNnz", {"hipsparseDpruneDense2csrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseHpruneCsr2csr_bufferSizeExt", {"hipsparseHpruneCsr2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseSpruneCsr2csr_bufferSizeExt", {"hipsparseSpruneCsr2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDpruneCsr2csr_bufferSizeExt", {"hipsparseDpruneCsr2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseHpruneCsr2csrNnz", {"hipsparseHpruneCsr2csrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseSpruneCsr2csrNnz", {"hipsparseSpruneCsr2csrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDpruneCsr2csrNnz", {"hipsparseDpruneCsr2csrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseHpruneDense2csrByPercentage_bufferSizeExt", {"hipsparseHpruneDense2csrByPercentage_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseSpruneDense2csrByPercentage_bufferSizeExt", {"hipsparseSpruneDense2csrByPercentage_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDpruneDense2csrByPercentage_bufferSizeExt", {"hipsparseDpruneDense2csrByPercentage_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseHpruneDense2csrNnzByPercentage", {"hipsparseHpruneDense2csrNnzByPercentage", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseSpruneDense2csrNnzByPercentage", {"hipsparseSpruneDense2csrNnzByPercentage", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDpruneDense2csrNnzByPercentage", {"hipsparseDpruneDense2csrNnzByPercentage", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseHpruneCsr2csrByPercentage_bufferSizeExt", {"hipsparseHpruneCsr2csrByPercentage_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseSpruneCsr2csrByPercentage_bufferSizeExt", {"hipsparseSpruneCsr2csrByPercentage_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDpruneCsr2csrByPercentage_bufferSizeExt", {"hipsparseDpruneCsr2csrByPercentage_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseHpruneCsr2csrNnzByPercentage", {"hipsparseHpruneCsr2csrNnzByPercentage", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseSpruneCsr2csrNnzByPercentage", {"hipsparseSpruneCsr2csrNnzByPercentage", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDpruneCsr2csrNnzByPercentage", {"hipsparseDpruneCsr2csrNnzByPercentage", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseSnnz_compress", {"hipsparseSnnz_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDnnz_compress", {"hipsparseDnnz_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCnnz_compress", {"hipsparseCnnz_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZnnz_compress", {"hipsparseZnnz_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
};
|
||||
|
||||
@@ -0,0 +1,413 @@
|
||||
// RUN: %run_test hipify "%s" "%t" %cuda_args
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
// CHECK: #include <hip/hip_runtime.h>
|
||||
#include <cuda_runtime.h>
|
||||
// CHECK: #include <hipsparse.h>
|
||||
#include <cusparse.h>
|
||||
// CHECK: #include <hipblas.h>
|
||||
#include <cublas_v2.h>
|
||||
|
||||
// NOTE: CUDA 10.0
|
||||
|
||||
/*
|
||||
* compute | b - A*x|_inf
|
||||
*/
|
||||
void residaul_eval(
|
||||
int n,
|
||||
const float *dl,
|
||||
const float *d,
|
||||
const float *du,
|
||||
const float *b,
|
||||
const float *x,
|
||||
float *r_nrminf_ptr)
|
||||
{
|
||||
float r_nrminf = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
float dot = 0;
|
||||
if (i > 0) {
|
||||
dot += dl[i] * x[i - 1];
|
||||
}
|
||||
dot += d[i] * x[i];
|
||||
if (i < (n - 1)) {
|
||||
dot += du[i] * x[i + 1];
|
||||
}
|
||||
float ri = b[i] - dot;
|
||||
r_nrminf = (r_nrminf > fabs(ri)) ? r_nrminf : fabs(ri);
|
||||
}
|
||||
|
||||
*r_nrminf_ptr = r_nrminf;
|
||||
}
|
||||
|
||||
int main(int argc, char*argv[])
|
||||
{
|
||||
// CHECK: hipsparseHandle_t cusparseH = NULL;
|
||||
cusparseHandle_t cusparseH = NULL;
|
||||
// CHECK: hipblasHandle_t cublasH = NULL;
|
||||
cublasHandle_t cublasH = NULL;
|
||||
// CHECK: hipStream_t stream = NULL;
|
||||
cudaStream_t stream = NULL;
|
||||
// CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS;
|
||||
cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS;
|
||||
// CHECK: hipblasStatus_t cublasStat = HIPBLAS_STATUS_SUCCESS;
|
||||
cublasStatus_t cublasStat = CUBLAS_STATUS_SUCCESS;
|
||||
// CHECK: hipError_t cudaStat1 = hipSuccess;
|
||||
cudaError_t cudaStat1 = cudaSuccess;
|
||||
|
||||
const int n = 3;
|
||||
const int batchSize = 2;
|
||||
/*
|
||||
* | 1 6 0 | | 1 | | -0.603960 |
|
||||
* A1 =| 4 2 7 |, b1 = | 2 |, x1 = | 0.267327 |
|
||||
* | 0 5 3 | | 3 | | 0.554455 |
|
||||
*
|
||||
* | 8 13 0 | | 4 | | -0.063291 |
|
||||
* A2 =| 11 9 14 |, b2 = | 5 |, x2 = | 0.346641 |
|
||||
* | 0 12 10 | | 6 | | 0.184031 |
|
||||
*/
|
||||
|
||||
/*
|
||||
* A = (dl, d, du), B and X are in aggregate format
|
||||
*/
|
||||
const float dl[n * batchSize] = { 0, 4, 5, 0, 11, 12 };
|
||||
const float d[n * batchSize] = { 1, 2, 3, 8, 9, 10 };
|
||||
const float du[n * batchSize] = { 6, 7, 0, 13, 14, 0 };
|
||||
const float B[n * batchSize] = { 1, 2, 3, 4, 5, 6 };
|
||||
float X[n * batchSize]; /* Xj = Aj \ Bj */
|
||||
|
||||
/* device memory
|
||||
* (d_dl0, d_d0, d_du0) is aggregate format
|
||||
* (d_dl, d_d, d_du) is interleaved format
|
||||
*/
|
||||
float *d_dl0 = NULL;
|
||||
float *d_d0 = NULL;
|
||||
float *d_du0 = NULL;
|
||||
float *d_dl = NULL;
|
||||
float *d_d = NULL;
|
||||
float *d_du = NULL;
|
||||
float *d_B = NULL;
|
||||
float *d_X = NULL;
|
||||
|
||||
size_t lworkInBytes = 0;
|
||||
char *d_work = NULL;
|
||||
|
||||
/*
|
||||
* algo = 0: cuThomas (unstable)
|
||||
* algo = 1: LU with pivoting (stable)
|
||||
* algo = 2: QR (stable)
|
||||
*/
|
||||
const int algo = 2;
|
||||
|
||||
const float h_one = 1;
|
||||
const float h_zero = 0;
|
||||
|
||||
printf("example of gtsv (interleaved format) \n");
|
||||
printf("choose algo = 0,1,2 to select different algorithms \n");
|
||||
printf("n = %d, batchSize = %d, algo = %d \n", n, batchSize, algo);
|
||||
|
||||
/* step 1: create cusparse/cublas handle, bind a stream */
|
||||
// CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking);
|
||||
cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: status = hipsparseCreate(&cusparseH);
|
||||
status = cusparseCreate(&cusparseH);
|
||||
//CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == status);
|
||||
// CHECK: status = hipsparseSetStream(cusparseH, stream);
|
||||
status = cusparseSetStream(cusparseH, stream);
|
||||
//CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == status);
|
||||
// CHECK: cublasStat = hipblasCreate(&cublasH);
|
||||
cublasStat = cublasCreate(&cublasH);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
// CHECK: cublasStat = hipblasSetStream(cublasH, stream);
|
||||
cublasStat = cublasSetStream(cublasH, stream);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
|
||||
/* step 2: allocate device memory */
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_dl0, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_dl0, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_d0, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_d0, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_du0, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_du0, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_dl, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_dl, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_d, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_d, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_du, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_du, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_B, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_B, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_X, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_X, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
|
||||
/* step 3: prepare data in device, interleaved format */
|
||||
// CHECK: cudaStat1 = hipMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, hipMemcpyHostToDevice);
|
||||
cudaStat1 = cudaMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMemcpy(d_d0, d, sizeof(float)*n*batchSize, hipMemcpyHostToDevice);
|
||||
cudaStat1 = cudaMemcpy(d_d0, d, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMemcpy(d_du0, du, sizeof(float)*n*batchSize, hipMemcpyHostToDevice);
|
||||
cudaStat1 = cudaMemcpy(d_du0, du, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMemcpy(d_B, B, sizeof(float)*n*batchSize, hipMemcpyHostToDevice);
|
||||
cudaStat1 = cudaMemcpy(d_B, B, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: hipDeviceSynchronize();
|
||||
cudaDeviceSynchronize();
|
||||
/* convert dl to interleaved format
|
||||
* dl = transpose(dl0)
|
||||
*/
|
||||
// CHECK: cublasStat = hipblasSgeam(
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
cublasStat = cublasSgeam(
|
||||
cublasH,
|
||||
CUBLAS_OP_T, /* transa */
|
||||
CUBLAS_OP_T, /* transb, don't care */
|
||||
batchSize, /* number of rows of dl */
|
||||
n, /* number of columns of dl */
|
||||
&h_one,
|
||||
d_dl0, /* dl0 is n-by-batchSize */
|
||||
n, /* leading dimension of dl0 */
|
||||
&h_zero,
|
||||
NULL,
|
||||
n, /* don't cae */
|
||||
d_dl, /* dl is batchSize-by-n */
|
||||
batchSize /* leading dimension of dl */
|
||||
);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
/* convert d to interleaved format
|
||||
* d = transpose(d0)
|
||||
*/
|
||||
// CHECK: cublasStat = hipblasSgeam(
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
cublasStat = cublasSgeam(
|
||||
cublasH,
|
||||
CUBLAS_OP_T, /* transa */
|
||||
CUBLAS_OP_T, /* transb, don't care */
|
||||
batchSize, /* number of rows of d */
|
||||
n, /* number of columns of d */
|
||||
&h_one,
|
||||
d_d0, /* d0 is n-by-batchSize */
|
||||
n, /* leading dimension of d0 */
|
||||
&h_zero,
|
||||
NULL,
|
||||
n, /* don't cae */
|
||||
d_d, /* d is batchSize-by-n */
|
||||
batchSize /* leading dimension of d */
|
||||
);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
|
||||
/* convert du to interleaved format
|
||||
* du = transpose(du0)
|
||||
*/
|
||||
// CHECK: cublasStat = hipblasSgeam(
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
cublasStat = cublasSgeam(
|
||||
cublasH,
|
||||
CUBLAS_OP_T, /* transa */
|
||||
CUBLAS_OP_T, /* transb, don't care */
|
||||
batchSize, /* number of rows of du */
|
||||
n, /* number of columns of du */
|
||||
&h_one,
|
||||
d_du0, /* du0 is n-by-batchSize */
|
||||
n, /* leading dimension of du0 */
|
||||
&h_zero,
|
||||
NULL,
|
||||
n, /* don't cae */
|
||||
d_du, /* du is batchSize-by-n */
|
||||
batchSize /* leading dimension of du */
|
||||
);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
|
||||
/* convert B to interleaved format
|
||||
* X = transpose(B)
|
||||
*/
|
||||
// CHECK: cublasStat = hipblasSgeam(
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
cublasStat = cublasSgeam(
|
||||
cublasH,
|
||||
CUBLAS_OP_T, /* transa */
|
||||
CUBLAS_OP_T, /* transb, don't care */
|
||||
batchSize, /* number of rows of X */
|
||||
n, /* number of columns of X */
|
||||
&h_one,
|
||||
d_B, /* B is n-by-batchSize */
|
||||
n, /* leading dimension of B */
|
||||
&h_zero,
|
||||
NULL,
|
||||
n, /* don't cae */
|
||||
d_X, /* X is batchSize-by-n */
|
||||
batchSize /* leading dimension of X */
|
||||
);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
/* step 4: prepare workspace */
|
||||
// NOTE: CUDA 10.0
|
||||
// CHECK: status = hipsparseSgtsvInterleavedBatch_bufferSizeExt(
|
||||
status = cusparseSgtsvInterleavedBatch_bufferSizeExt(
|
||||
cusparseH,
|
||||
algo,
|
||||
n,
|
||||
d_dl,
|
||||
d_d,
|
||||
d_du,
|
||||
d_X,
|
||||
batchSize,
|
||||
&lworkInBytes);
|
||||
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == status);
|
||||
|
||||
printf("lworkInBytes = %lld \n", (long long)lworkInBytes);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes);
|
||||
cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
|
||||
/* step 5: solve Aj*xj = bj */
|
||||
// NOTE: CUDA 10.0
|
||||
// CHECK: status = hipsparseSgtsvInterleavedBatch(
|
||||
status = cusparseSgtsvInterleavedBatch(
|
||||
cusparseH,
|
||||
algo,
|
||||
n,
|
||||
d_dl,
|
||||
d_d,
|
||||
d_du,
|
||||
d_X,
|
||||
batchSize,
|
||||
d_work);
|
||||
// CHECK: cudaStat1 = hipDeviceSynchronize();
|
||||
cudaStat1 = cudaDeviceSynchronize();
|
||||
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == status);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
|
||||
/* step 6: convert X back to aggregate format */
|
||||
/* B = transpose(X) */
|
||||
// CHECK: cublasStat = hipblasSgeam(
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
cublasStat = cublasSgeam(
|
||||
cublasH,
|
||||
CUBLAS_OP_T, /* transa */
|
||||
CUBLAS_OP_T, /* transb, don't care */
|
||||
n, /* number of rows of B */
|
||||
batchSize, /* number of columns of B */
|
||||
&h_one,
|
||||
d_X, /* X is batchSize-by-n */
|
||||
batchSize, /* leading dimension of X */
|
||||
&h_zero,
|
||||
NULL,
|
||||
n, /* don't cae */
|
||||
d_B, /* B is n-by-batchSize */
|
||||
n /* leading dimension of B */
|
||||
);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
// CHECK: hipDeviceSynchronize();
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
/* step 7: residual evaluation */
|
||||
// CHECK: cudaStat1 = hipMemcpy(X, d_B, sizeof(float)*n*batchSize, hipMemcpyDeviceToHost);
|
||||
cudaStat1 = cudaMemcpy(X, d_B, sizeof(float)*n*batchSize, cudaMemcpyDeviceToHost);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: hipDeviceSynchronize();
|
||||
cudaDeviceSynchronize();
|
||||
printf("==== x1 = inv(A1)*b1 \n");
|
||||
for (int j = 0; j < n; j++) {
|
||||
printf("x1[%d] = %f\n", j, X[j]);
|
||||
}
|
||||
|
||||
float r1_nrminf;
|
||||
residaul_eval(
|
||||
n,
|
||||
dl,
|
||||
d,
|
||||
du,
|
||||
B,
|
||||
X,
|
||||
&r1_nrminf
|
||||
);
|
||||
printf("|b1 - A1*x1| = %E\n", r1_nrminf);
|
||||
|
||||
printf("\n==== x2 = inv(A2)*b2 \n");
|
||||
for (int j = 0; j < n; j++) {
|
||||
printf("x2[%d] = %f\n", j, X[n + j]);
|
||||
}
|
||||
|
||||
float r2_nrminf;
|
||||
residaul_eval(
|
||||
n,
|
||||
dl + n,
|
||||
d + n,
|
||||
du + n,
|
||||
B + n,
|
||||
X + n,
|
||||
&r2_nrminf
|
||||
);
|
||||
printf("|b2 - A2*x2| = %E\n", r2_nrminf);
|
||||
|
||||
/* free resources */
|
||||
// CHECK: if (d_dl0) hipFree(d_dl0);
|
||||
if (d_dl0) cudaFree(d_dl0);
|
||||
// CHECK: if (d_d0) hipFree(d_d0);
|
||||
if (d_d0) cudaFree(d_d0);
|
||||
// CHECK: if (d_du0) hipFree(d_du0);
|
||||
if (d_du0) cudaFree(d_du0);
|
||||
// CHECK: if (d_dl) hipFree(d_dl);
|
||||
if (d_dl) cudaFree(d_dl);
|
||||
// CHECK: if (d_d) hipFree(d_d);
|
||||
if (d_d) cudaFree(d_d);
|
||||
// CHECK: if (d_du) hipFree(d_du);
|
||||
if (d_du) cudaFree(d_du);
|
||||
// CHECK: if (d_B) hipFree(d_B);
|
||||
if (d_B) cudaFree(d_B);
|
||||
// CHECK: if (d_X) hipFree(d_X);
|
||||
if (d_X) cudaFree(d_X);
|
||||
// CHECK: if (cusparseH) hipsparseDestroy(cusparseH);
|
||||
if (cusparseH) cusparseDestroy(cusparseH);
|
||||
// CHECK: if (cublasH) hipblasDestroy(cublasH);
|
||||
if (cublasH) cublasDestroy(cublasH);
|
||||
// CHECK: if (stream) hipStreamDestroy(stream);
|
||||
if (stream) cudaStreamDestroy(stream);
|
||||
// CHECK: hipDeviceReset();
|
||||
cudaDeviceReset();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -23,6 +23,10 @@ config.test_source_root = os.path.dirname(__file__)
|
||||
|
||||
config.excludes = ['cmdparser.hpp']
|
||||
|
||||
config.cuda_version = "@CUDA_VERSION@"
|
||||
if config.cuda_version not in ['10.0']:
|
||||
config.excludes.append('cuSPARSE_08.cu')
|
||||
|
||||
# test_exec_root: The path where tests are located (default is the test suite root).
|
||||
#config.test_exec_root = config.test_source_root
|
||||
|
||||
|
||||
Reference in New Issue
Block a user