Merge pull request #784 from emankov/master

[HIPIFY][SPARSE] Matrix Reorderings and Format Conversion Reference

[ROCm/clr commit: 7de2948334]
This commit is contained in:
Evgeny Mankov
2018-11-30 15:35:14 +03:00
zatwierdzone przez GitHub
4 zmienionych plików z 763 dodań i 0 usunięć
@@ -300,6 +300,7 @@
|`cusparseScsrgeam` | |
|`cusparseDcsrgeam` | |
|`cusparseCcsrgeam` | |
|`cusparseZcsrgeam` | |
|`cusparseScsrgeam2_bufferSizeExt` | |
|`cusparseDcsrgeam2_bufferSizeExt` | |
|`cusparseCcsrgeam2_bufferSizeExt` | |
@@ -308,6 +309,7 @@
|`cusparseScsrgemm` | |
|`cusparseDcsrgemm` | |
|`cusparseCcsrgemm` | |
|`cusparseZcsrgemm` | |
|`cusparseScsrgemm2_bufferSizeExt` | |
|`cusparseDcsrgemm2_bufferSizeExt` | |
|`cusparseCcsrgemm2_bufferSizeExt` | |
@@ -460,3 +462,157 @@
|`cusparseDgpsvInterleavedBatch` | |
|`cusparseCgpsvInterleavedBatch` | |
|`cusparseZgpsvInterleavedBatch` | |
## **8. cuSPARSE Matrix Reorderings Reference**
| **CUDA** | **HIP** |
|-----------------------------------------------------------|-------------------------------------------------|
|`cusparseScsrcolor` | |
|`cusparseDcsrcolor` | |
|`cusparseCcsrcolor` | |
|`cusparseZcsrcolor` | |
## **9. cuSPARSE Format Conversion Reference**
| **CUDA** | **HIP** |
|-----------------------------------------------------------|-------------------------------------------------|
|`cusparseSbsr2csr` | |
|`cusparseDbsr2csr` | |
|`cusparseCbsr2csr` | |
|`cusparseZbsr2csr` | |
|`cusparseSgebsr2gebsc_bufferSize` | |
|`cusparseDgebsr2gebsc_bufferSize` | |
|`cusparseCgebsr2gebsc_bufferSize` | |
|`cusparseZgebsr2gebsc_bufferSize` | |
|`cusparseSgebsr2gebsc` | |
|`cusparseDgebsr2gebsc` | |
|`cusparseCgebsr2gebsc` | |
|`cusparseZgebsr2gebsc` | |
|`cusparseSgebsr2gebsr_bufferSize` | |
|`cusparseDgebsr2gebsr_bufferSize` | |
|`cusparseCgebsr2gebsr_bufferSize` | |
|`cusparseZgebsr2gebsr_bufferSize` | |
|`cusparseXgebsr2gebsrNnz` | |
|`cusparseSgebsr2gebsr` | |
|`cusparseDgebsr2gebsr` | |
|`cusparseCgebsr2gebsr` | |
|`cusparseZgebsr2gebsr` | |
|`cusparseSgebsr2csr` | |
|`cusparseDgebsr2csr` | |
|`cusparseCgebsr2csr` | |
|`cusparseZgebsr2csr` | |
|`cusparseScsr2gebsr_bufferSize` | |
|`cusparseDcsr2gebsr_bufferSize` | |
|`cusparseCcsr2gebsr_bufferSize` | |
|`cusparseZcsr2gebsr_bufferSize` | |
|`cusparseXcsr2gebsrNnz` | |
|`cusparseScsr2gebsr` | |
|`cusparseDcsr2gebsr` | |
|`cusparseCcsr2gebsr` | |
|`cusparseZcsr2gebsr` | |
|`cusparseXcoo2csr` |`hipsparseXcoo2csr` |
|`cusparseScsc2dense` | |
|`cusparseDcsc2dense` | |
|`cusparseCcsc2dense` | |
|`cusparseZcsc2dense` | |
|`cusparseScsc2hyb` | |
|`cusparseDcsc2hyb` | |
|`cusparseCcsc2hyb` | |
|`cusparseZcsc2hyb` | |
|`cusparseXcsr2bsrNnz` | |
|`cusparseScsr2bsr` | |
|`cusparseDcsr2bsr` | |
|`cusparseCcsr2bsr` | |
|`cusparseZcsr2bsr` | |
|`cusparseXcsr2coo` |`hipsparseXcsr2coo` |
|`cusparseScsr2csc` |`hipsparseScsr2csc` |
|`cusparseDcsr2csc` |`hipsparseDcsr2csc` |
|`cusparseCcsr2csc` | |
|`cusparseZcsr2csc` | |
|`cusparseCsr2cscEx` | |
|`cusparseScsr2dense` | |
|`cusparseDcsr2dense` | |
|`cusparseCcsr2dense` | |
|`cusparseZcsr2dense` | |
|`cusparseScsr2csr_compress` | |
|`cusparseDcsr2csr_compress` | |
|`cusparseCcsr2csr_compress` | |
|`cusparseZcsr2csr_compress` | |
|`cusparseScsr2hyb` |`hipsparseScsr2hyb` |
|`cusparseDcsr2hyb` |`hipsparseDcsr2hyb` |
|`cusparseCcsr2hyb` | |
|`cusparseZcsr2hyb` | |
|`cusparseSdense2csc` | |
|`cusparseDdense2csc` | |
|`cusparseCdense2csc` | |
|`cusparseZdense2csc` | |
|`cusparseSdense2csr` | |
|`cusparseDdense2csr` | |
|`cusparseCdense2csr` | |
|`cusparseZdense2csr` | |
|`cusparseSdense2hyb` | |
|`cusparseDdense2hyb` | |
|`cusparseCdense2hyb` | |
|`cusparseZdense2hyb` | |
|`cusparseShyb2csc` | |
|`cusparseDhyb2csc` | |
|`cusparseChyb2csc` | |
|`cusparseZhyb2csc` | |
|`cusparseShyb2csr` | |
|`cusparseDhyb2csr` | |
|`cusparseChyb2csr` | |
|`cusparseZhyb2csr` | |
|`cusparseShyb2dense` | |
|`cusparseDhyb2dense` | |
|`cusparseChyb2dense` | |
|`cusparseZhyb2dense` | |
|`cusparseSnnz` | |
|`cusparseDnnz` | |
|`cusparseCnnz` | |
|`cusparseZnnz` | |
|`cusparseCreateIdentityPermutation` |`hipsparseCreateIdentityPermutation` |
|`cusparseXcoosort_bufferSizeExt` |`hipsparseXcoosort_bufferSizeExt` |
|`cusparseXcoosortByRow` |`hipsparseXcoosortByRow` |
|`cusparseXcoosortByColumn` |`hipsparseXcoosortByColumn` |
|`cusparseXcsrsort_bufferSizeExt` |`hipsparseXcsrsort_bufferSizeExt` |
|`cusparseXcsrsort` |`hipsparseXcsrsort` |
|`cusparseScusparseXcscsort_bufferSizeExtnnz` | |
|`cusparseXcscsort` | |
|`cusparseCreateCsru2csrInfo` | |
|`cusparseDestroyCsru2csrInfo` | |
|`cusparseScsru2csr_bufferSizeExt` | |
|`cusparseDcsru2csr_bufferSizeExt` | |
|`cusparseCcsru2csr_bufferSizeExt` | |
|`cusparseZcsru2csr_bufferSizeExt` | |
|`cusparseScsru2csr` | |
|`cusparseDcsru2csr` | |
|`cusparseCcsru2csr` | |
|`cusparseZcsru2csr` | |
|`cusparseHpruneDense2csr_bufferSizeExt` | |
|`cusparseSpruneDense2csr_bufferSizeExt` | |
|`cusparseDpruneDense2csr_bufferSizeExt` | |
|`cusparseHpruneDense2csrNnz` | |
|`cusparseSpruneDense2csrNnz` | |
|`cusparseDpruneDense2csrNnz` | |
|`cusparseHpruneCsr2csr_bufferSizeExt` | |
|`cusparseSpruneCsr2csr_bufferSizeExt` | |
|`cusparseDpruneCsr2csr_bufferSizeExt` | |
|`cusparseHpruneCsr2csrNnz` | |
|`cusparseSpruneCsr2csrNnz` | |
|`cusparseDpruneCsr2csrNnz` | |
|`cusparseHpruneDense2csrByPercentage_bufferSizeExt` | |
|`cusparseSpruneDense2csrByPercentage_bufferSizeExt` | |
|`cusparseDpruneDense2csrByPercentage_bufferSizeExt` | |
|`cusparseHpruneDense2csrNnzByPercentage` | |
|`cusparseSpruneDense2csrNnzByPercentage` | |
|`cusparseDpruneDense2csrNnzByPercentage` | |
|`cusparseHpruneCsr2csrByPercentage_bufferSizeExt` | |
|`cusparseSpruneCsr2csrByPercentage_bufferSizeExt` | |
|`cusparseDpruneCsr2csrByPercentage_bufferSizeExt` | |
|`cusparseHpruneCsr2csrNnzByPercentage` | |
|`cusparseSpruneCsr2csrNnzByPercentage` | |
|`cusparseDpruneCsr2csrNnzByPercentage` | |
|`cusparseSnnz_compress` | |
|`cusparseDnnz_compress` | |
|`cusparseCnnz_compress` | |
|`cusparseZnnz_compress` | |
@@ -245,6 +245,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_SPARSE_FUNCTION_MAP{
{"cusparseScsrgeam", {"hipsparseScsrgeam", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDcsrgeam", {"hipsparseDcsrgeam", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCcsrgeam", {"hipsparseCcsrgeam", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsrgeam", {"hipsparseZcsrgeam", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseScsrgeam2_bufferSizeExt", {"hipsparseScsrgeam2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDcsrgeam2_bufferSizeExt", {"hipsparseDcsrgeam2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
@@ -255,6 +256,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_SPARSE_FUNCTION_MAP{
{"cusparseScsrgemm", {"hipsparseScsrgemm", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDcsrgemm", {"hipsparseDcsrgemm", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCcsrgemm", {"hipsparseCcsrgemm", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsrgemm", {"hipsparseZcsrgemm", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseScsrgemm2_bufferSizeExt", {"hipsparseScsrgemm2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDcsrgemm2_bufferSizeExt", {"hipsparseDcsrgemm2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
@@ -416,4 +418,192 @@ const std::map<llvm::StringRef, hipCounter> CUDA_SPARSE_FUNCTION_MAP{
{"cusparseDgpsvInterleavedBatch", {"hipsparseDgpsvInterleavedBatch", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCgpsvInterleavedBatch", {"hipsparseCgpsvInterleavedBatch", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZgpsvInterleavedBatch", {"hipsparseZgpsvInterleavedBatch", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
// 11. cuSPARSE Matrix Reorderings Reference
{"cusparseScsrcolor", {"hipsparseScsrcolor", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDcsrcolor", {"hipsparseDcsrcolor", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCcsrcolor", {"hipsparseCcsrcolor", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsrcolor", {"hipsparseZcsrcolor", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
// 12. cuSPARSE Format Conversion Reference
{"cusparseSbsr2csr", {"hipsparseSbsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDbsr2csr", {"hipsparseDbsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCbsr2csr", {"hipsparseCbsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZbsr2csr", {"hipsparseZbsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSgebsr2gebsc_bufferSize", {"hipsparseSgebsr2gebsc_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDgebsr2gebsc_bufferSize", {"hipsparseDgebsr2gebsc_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCgebsr2gebsc_bufferSize", {"hipsparseCgebsr2gebsc_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZgebsr2gebsc_bufferSize", {"hipsparseZgebsr2gebsc_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSgebsr2gebsc", {"hipsparseSgebsr2gebsc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDgebsr2gebsc", {"hipsparseDgebsr2gebsc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCgebsr2gebsc", {"hipsparseCgebsr2gebsc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZgebsr2gebsc", {"hipsparseZgebsr2gebsc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSgebsr2gebsr_bufferSize", {"hipsparseSgebsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDgebsr2gebsr_bufferSize", {"hipsparseDgebsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCgebsr2gebsr_bufferSize", {"hipsparseCgebsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZgebsr2gebsr_bufferSize", {"hipsparseZgebsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSgebsr2csr", {"hipsparseSgebsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDgebsr2csr", {"hipsparseDgebsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCgebsr2csr", {"hipsparseCgebsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZgebsr2csr", {"hipsparseZgebsr2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseXgebsr2gebsrNnz", {"hipsparseXgebsr2gebsrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSgebsr2gebsr", {"hipsparseSgebsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDgebsr2gebsr", {"hipsparseDgebsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCgebsr2gebsr", {"hipsparseCgebsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZgebsr2gebsr", {"hipsparseZgebsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseScsr2gebsr_bufferSize", {"hipsparseScsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDcsr2gebsr_bufferSize", {"hipsparseDcsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCcsr2gebsr_bufferSize", {"hipsparseCcsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsr2gebsr_bufferSize", {"hipsparseZcsr2gebsr_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseXcsr2gebsrNnz", {"hipsparseXcsr2gebsrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseScsr2gebsr", {"hipsparseScsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDcsr2gebsr", {"hipsparseDcsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCcsr2gebsr", {"hipsparseCcsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsr2gebsr", {"hipsparseZcsr2gebsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseXcoo2csr", {"hipsparseXcoo2csr", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseScsc2dense", {"hipsparseScsc2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDcsc2dense", {"hipsparseDcsc2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCcsc2dense", {"hipsparseCcsc2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsc2dense", {"hipsparseZcsc2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseScsc2hyb", {"hipsparseScsc2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDcsc2hyb", {"hipsparseDcsc2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCcsc2hyb", {"hipsparseCcsc2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsc2hyb", {"hipsparseZcsc2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseXcsr2bsrNnz", {"hipsparseXcsr2bsrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseScsr2bsr", {"hipsparseScsr2bsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDcsr2bsr", {"hipsparseDcsr2bsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCcsr2bsr", {"hipsparseCcsr2bsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsr2bsr", {"hipsparseZcsr2bsr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseXcsr2coo", {"hipsparseXcsr2coo", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseScsr2csc", {"hipsparseScsr2csc", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseDcsr2csc", {"hipsparseDcsr2csc", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseCcsr2csc", {"hipsparseCcsr2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsr2csc", {"hipsparseZcsr2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCsr2cscEx", {"hipsparseCsr2cscEx", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseScsr2dense", {"hipsparseScsr2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDcsr2dense", {"hipsparseDcsr2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCcsr2dense", {"hipsparseCcsr2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsr2dense", {"hipsparseZcsr2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseScsr2csr_compress", {"hipsparseScsr2csr_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDcsr2csr_compress", {"hipsparseDcsr2csr_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDcsr2csr_compress", {"hipsparseDcsr2csr_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsr2csr_compress", {"hipsparseZcsr2csr_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseScsr2hyb", {"hipsparseScsr2hyb", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseDcsr2hyb", {"hipsparseDcsr2hyb", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseCcsr2hyb", {"hipsparseCcsr2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsr2hyb", {"hipsparseZcsr2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSdense2csc", {"hipsparseSdense2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDdense2csc", {"hipsparseDdense2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCdense2csc", {"hipsparseCdense2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZdense2csc", {"hipsparseZdense2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSdense2csr", {"hipsparseSdense2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDdense2csr", {"hipsparseDdense2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCdense2csr", {"hipsparseCdense2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZdense2csr", {"hipsparseZdense2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSdense2hyb", {"hipsparseSdense2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDdense2hyb", {"hipsparseDdense2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCdense2hyb", {"hipsparseCdense2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZdense2hyb", {"hipsparseZdense2hyb", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseShyb2csc", {"hipsparseShyb2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDhyb2csc", {"hipsparseDhyb2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseChyb2csc", {"hipsparseChyb2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZhyb2csc", {"hipsparseZhyb2csc", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseShyb2csr", {"hipsparseShyb2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDhyb2csr", {"hipsparseDhyb2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseChyb2csr", {"hipsparseChyb2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZhyb2csr", {"hipsparseZhyb2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseShyb2dense", {"hipsparseShyb2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDhyb2dense", {"hipsparseDhyb2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseChyb2dense", {"hipsparseChyb2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZhyb2dense", {"hipsparseZhyb2dense", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSnnz", {"hipsparseSnnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDnnz", {"hipsparseDnnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCnnz", {"hipsparseCnnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZnnz", {"hipsparseZnnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCreateIdentityPermutation", {"hipsparseCreateIdentityPermutation", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseXcoosort_bufferSizeExt", {"hipsparseXcoosort_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseXcoosortByRow", {"hipsparseXcoosortByRow", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseXcoosortByColumn", {"hipsparseXcoosortByColumn", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseXcsrsort_bufferSizeExt", {"hipsparseXcsrsort_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseXcsrsort", {"hipsparseXcsrsort", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseScusparseXcscsort_bufferSizeExtnnz", {"hipsparseXcscsort_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseXcscsort", {"hipsparseXcscsort", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCreateCsru2csrInfo", {"hipsparseCreateCsru2csrInfo", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDestroyCsru2csrInfo", {"hipsparseDestroyCsru2csrInfo", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseScsru2csr_bufferSizeExt", {"hipsparseScsru2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDcsru2csr_bufferSizeExt", {"hipsparseDcsru2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCcsru2csr_bufferSizeExt", {"hipsparseCcsru2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsru2csr_bufferSizeExt", {"hipsparseZcsru2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseScsru2csr", {"hipsparseScsru2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDcsru2csr", {"hipsparseDcsru2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCcsru2csr", {"hipsparseCcsru2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsru2csr", {"hipsparseZcsru2csr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseHpruneDense2csr_bufferSizeExt", {"hipsparseHpruneDense2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSpruneDense2csr_bufferSizeExt", {"hipsparseSpruneDense2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDpruneDense2csr_bufferSizeExt", {"hipsparseDpruneDense2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseHpruneDense2csrNnz", {"hipsparseHpruneDense2csrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSpruneDense2csrNnz", {"hipsparseSpruneDense2csrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDpruneDense2csrNnz", {"hipsparseDpruneDense2csrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseHpruneCsr2csr_bufferSizeExt", {"hipsparseHpruneCsr2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSpruneCsr2csr_bufferSizeExt", {"hipsparseSpruneCsr2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDpruneCsr2csr_bufferSizeExt", {"hipsparseDpruneCsr2csr_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseHpruneCsr2csrNnz", {"hipsparseHpruneCsr2csrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSpruneCsr2csrNnz", {"hipsparseSpruneCsr2csrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDpruneCsr2csrNnz", {"hipsparseDpruneCsr2csrNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseHpruneDense2csrByPercentage_bufferSizeExt", {"hipsparseHpruneDense2csrByPercentage_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSpruneDense2csrByPercentage_bufferSizeExt", {"hipsparseSpruneDense2csrByPercentage_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDpruneDense2csrByPercentage_bufferSizeExt", {"hipsparseDpruneDense2csrByPercentage_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseHpruneDense2csrNnzByPercentage", {"hipsparseHpruneDense2csrNnzByPercentage", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSpruneDense2csrNnzByPercentage", {"hipsparseSpruneDense2csrNnzByPercentage", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDpruneDense2csrNnzByPercentage", {"hipsparseDpruneDense2csrNnzByPercentage", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseHpruneCsr2csrByPercentage_bufferSizeExt", {"hipsparseHpruneCsr2csrByPercentage_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSpruneCsr2csrByPercentage_bufferSizeExt", {"hipsparseSpruneCsr2csrByPercentage_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDpruneCsr2csrByPercentage_bufferSizeExt", {"hipsparseDpruneCsr2csrByPercentage_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseHpruneCsr2csrNnzByPercentage", {"hipsparseHpruneCsr2csrNnzByPercentage", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSpruneCsr2csrNnzByPercentage", {"hipsparseSpruneCsr2csrNnzByPercentage", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDpruneCsr2csrNnzByPercentage", {"hipsparseDpruneCsr2csrNnzByPercentage", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSnnz_compress", {"hipsparseSnnz_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDnnz_compress", {"hipsparseDnnz_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCnnz_compress", {"hipsparseCnnz_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZnnz_compress", {"hipsparseZnnz_compress", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
};
@@ -0,0 +1,413 @@
// RUN: %run_test hipify "%s" "%t" %cuda_args
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
// CHECK: #include <hip/hip_runtime.h>
#include <cuda_runtime.h>
// CHECK: #include <hipsparse.h>
#include <cusparse.h>
// CHECK: #include <hipblas.h>
#include <cublas_v2.h>
// NOTE: CUDA 10.0
/*
* compute | b - A*x|_inf
*/
void residaul_eval(
int n,
const float *dl,
const float *d,
const float *du,
const float *b,
const float *x,
float *r_nrminf_ptr)
{
float r_nrminf = 0;
for (int i = 0; i < n; i++) {
float dot = 0;
if (i > 0) {
dot += dl[i] * x[i - 1];
}
dot += d[i] * x[i];
if (i < (n - 1)) {
dot += du[i] * x[i + 1];
}
float ri = b[i] - dot;
r_nrminf = (r_nrminf > fabs(ri)) ? r_nrminf : fabs(ri);
}
*r_nrminf_ptr = r_nrminf;
}
int main(int argc, char*argv[])
{
// CHECK: hipsparseHandle_t cusparseH = NULL;
cusparseHandle_t cusparseH = NULL;
// CHECK: hipblasHandle_t cublasH = NULL;
cublasHandle_t cublasH = NULL;
// CHECK: hipStream_t stream = NULL;
cudaStream_t stream = NULL;
// CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS;
cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS;
// CHECK: hipblasStatus_t cublasStat = HIPBLAS_STATUS_SUCCESS;
cublasStatus_t cublasStat = CUBLAS_STATUS_SUCCESS;
// CHECK: hipError_t cudaStat1 = hipSuccess;
cudaError_t cudaStat1 = cudaSuccess;
const int n = 3;
const int batchSize = 2;
/*
* | 1 6 0 | | 1 | | -0.603960 |
* A1 =| 4 2 7 |, b1 = | 2 |, x1 = | 0.267327 |
* | 0 5 3 | | 3 | | 0.554455 |
*
* | 8 13 0 | | 4 | | -0.063291 |
* A2 =| 11 9 14 |, b2 = | 5 |, x2 = | 0.346641 |
* | 0 12 10 | | 6 | | 0.184031 |
*/
/*
* A = (dl, d, du), B and X are in aggregate format
*/
const float dl[n * batchSize] = { 0, 4, 5, 0, 11, 12 };
const float d[n * batchSize] = { 1, 2, 3, 8, 9, 10 };
const float du[n * batchSize] = { 6, 7, 0, 13, 14, 0 };
const float B[n * batchSize] = { 1, 2, 3, 4, 5, 6 };
float X[n * batchSize]; /* Xj = Aj \ Bj */
/* device memory
* (d_dl0, d_d0, d_du0) is aggregate format
* (d_dl, d_d, d_du) is interleaved format
*/
float *d_dl0 = NULL;
float *d_d0 = NULL;
float *d_du0 = NULL;
float *d_dl = NULL;
float *d_d = NULL;
float *d_du = NULL;
float *d_B = NULL;
float *d_X = NULL;
size_t lworkInBytes = 0;
char *d_work = NULL;
/*
* algo = 0: cuThomas (unstable)
* algo = 1: LU with pivoting (stable)
* algo = 2: QR (stable)
*/
const int algo = 2;
const float h_one = 1;
const float h_zero = 0;
printf("example of gtsv (interleaved format) \n");
printf("choose algo = 0,1,2 to select different algorithms \n");
printf("n = %d, batchSize = %d, algo = %d \n", n, batchSize, algo);
/* step 1: create cusparse/cublas handle, bind a stream */
// CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking);
cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
// CHECK: assert(hipSuccess == cudaStat1);
assert(cudaSuccess == cudaStat1);
// CHECK: status = hipsparseCreate(&cusparseH);
status = cusparseCreate(&cusparseH);
//CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
assert(CUSPARSE_STATUS_SUCCESS == status);
// CHECK: status = hipsparseSetStream(cusparseH, stream);
status = cusparseSetStream(cusparseH, stream);
//CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
assert(CUSPARSE_STATUS_SUCCESS == status);
// CHECK: cublasStat = hipblasCreate(&cublasH);
cublasStat = cublasCreate(&cublasH);
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
// CHECK: cublasStat = hipblasSetStream(cublasH, stream);
cublasStat = cublasSetStream(cublasH, stream);
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
/* step 2: allocate device memory */
// CHECK: cudaStat1 = hipMalloc((void**)&d_dl0, sizeof(float)*n*batchSize);
cudaStat1 = cudaMalloc((void**)&d_dl0, sizeof(float)*n*batchSize);
// CHECK: assert(hipSuccess == cudaStat1);
assert(cudaSuccess == cudaStat1);
// CHECK: cudaStat1 = hipMalloc((void**)&d_d0, sizeof(float)*n*batchSize);
cudaStat1 = cudaMalloc((void**)&d_d0, sizeof(float)*n*batchSize);
// CHECK: assert(hipSuccess == cudaStat1);
assert(cudaSuccess == cudaStat1);
// CHECK: cudaStat1 = hipMalloc((void**)&d_du0, sizeof(float)*n*batchSize);
cudaStat1 = cudaMalloc((void**)&d_du0, sizeof(float)*n*batchSize);
// CHECK: assert(hipSuccess == cudaStat1);
assert(cudaSuccess == cudaStat1);
// CHECK: cudaStat1 = hipMalloc((void**)&d_dl, sizeof(float)*n*batchSize);
cudaStat1 = cudaMalloc((void**)&d_dl, sizeof(float)*n*batchSize);
// CHECK: assert(hipSuccess == cudaStat1);
assert(cudaSuccess == cudaStat1);
// CHECK: cudaStat1 = hipMalloc((void**)&d_d, sizeof(float)*n*batchSize);
cudaStat1 = cudaMalloc((void**)&d_d, sizeof(float)*n*batchSize);
// CHECK: assert(hipSuccess == cudaStat1);
assert(cudaSuccess == cudaStat1);
// CHECK: cudaStat1 = hipMalloc((void**)&d_du, sizeof(float)*n*batchSize);
cudaStat1 = cudaMalloc((void**)&d_du, sizeof(float)*n*batchSize);
// CHECK: assert(hipSuccess == cudaStat1);
assert(cudaSuccess == cudaStat1);
// CHECK: cudaStat1 = hipMalloc((void**)&d_B, sizeof(float)*n*batchSize);
cudaStat1 = cudaMalloc((void**)&d_B, sizeof(float)*n*batchSize);
// CHECK: assert(hipSuccess == cudaStat1);
assert(cudaSuccess == cudaStat1);
// CHECK: cudaStat1 = hipMalloc((void**)&d_X, sizeof(float)*n*batchSize);
cudaStat1 = cudaMalloc((void**)&d_X, sizeof(float)*n*batchSize);
// CHECK: assert(hipSuccess == cudaStat1);
assert(cudaSuccess == cudaStat1);
/* step 3: prepare data in device, interleaved format */
// CHECK: cudaStat1 = hipMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, hipMemcpyHostToDevice);
cudaStat1 = cudaMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice);
// CHECK: assert(hipSuccess == cudaStat1);
assert(cudaSuccess == cudaStat1);
// CHECK: cudaStat1 = hipMemcpy(d_d0, d, sizeof(float)*n*batchSize, hipMemcpyHostToDevice);
cudaStat1 = cudaMemcpy(d_d0, d, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice);
// CHECK: assert(hipSuccess == cudaStat1);
assert(cudaSuccess == cudaStat1);
// CHECK: cudaStat1 = hipMemcpy(d_du0, du, sizeof(float)*n*batchSize, hipMemcpyHostToDevice);
cudaStat1 = cudaMemcpy(d_du0, du, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice);
// CHECK: assert(hipSuccess == cudaStat1);
assert(cudaSuccess == cudaStat1);
// CHECK: cudaStat1 = hipMemcpy(d_B, B, sizeof(float)*n*batchSize, hipMemcpyHostToDevice);
cudaStat1 = cudaMemcpy(d_B, B, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice);
// CHECK: assert(hipSuccess == cudaStat1);
assert(cudaSuccess == cudaStat1);
// CHECK: hipDeviceSynchronize();
cudaDeviceSynchronize();
/* convert dl to interleaved format
* dl = transpose(dl0)
*/
// CHECK: cublasStat = hipblasSgeam(
// CHECK: HIPBLAS_OP_T,
// CHECK: HIPBLAS_OP_T,
cublasStat = cublasSgeam(
cublasH,
CUBLAS_OP_T, /* transa */
CUBLAS_OP_T, /* transb, don't care */
batchSize, /* number of rows of dl */
n, /* number of columns of dl */
&h_one,
d_dl0, /* dl0 is n-by-batchSize */
n, /* leading dimension of dl0 */
&h_zero,
NULL,
n, /* don't cae */
d_dl, /* dl is batchSize-by-n */
batchSize /* leading dimension of dl */
);
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
/* convert d to interleaved format
* d = transpose(d0)
*/
// CHECK: cublasStat = hipblasSgeam(
// CHECK: HIPBLAS_OP_T,
// CHECK: HIPBLAS_OP_T,
cublasStat = cublasSgeam(
cublasH,
CUBLAS_OP_T, /* transa */
CUBLAS_OP_T, /* transb, don't care */
batchSize, /* number of rows of d */
n, /* number of columns of d */
&h_one,
d_d0, /* d0 is n-by-batchSize */
n, /* leading dimension of d0 */
&h_zero,
NULL,
n, /* don't cae */
d_d, /* d is batchSize-by-n */
batchSize /* leading dimension of d */
);
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
/* convert du to interleaved format
* du = transpose(du0)
*/
// CHECK: cublasStat = hipblasSgeam(
// CHECK: HIPBLAS_OP_T,
// CHECK: HIPBLAS_OP_T,
cublasStat = cublasSgeam(
cublasH,
CUBLAS_OP_T, /* transa */
CUBLAS_OP_T, /* transb, don't care */
batchSize, /* number of rows of du */
n, /* number of columns of du */
&h_one,
d_du0, /* du0 is n-by-batchSize */
n, /* leading dimension of du0 */
&h_zero,
NULL,
n, /* don't cae */
d_du, /* du is batchSize-by-n */
batchSize /* leading dimension of du */
);
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
/* convert B to interleaved format
* X = transpose(B)
*/
// CHECK: cublasStat = hipblasSgeam(
// CHECK: HIPBLAS_OP_T,
// CHECK: HIPBLAS_OP_T,
cublasStat = cublasSgeam(
cublasH,
CUBLAS_OP_T, /* transa */
CUBLAS_OP_T, /* transb, don't care */
batchSize, /* number of rows of X */
n, /* number of columns of X */
&h_one,
d_B, /* B is n-by-batchSize */
n, /* leading dimension of B */
&h_zero,
NULL,
n, /* don't cae */
d_X, /* X is batchSize-by-n */
batchSize /* leading dimension of X */
);
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
/* step 4: prepare workspace */
// NOTE: CUDA 10.0
// CHECK: status = hipsparseSgtsvInterleavedBatch_bufferSizeExt(
status = cusparseSgtsvInterleavedBatch_bufferSizeExt(
cusparseH,
algo,
n,
d_dl,
d_d,
d_du,
d_X,
batchSize,
&lworkInBytes);
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
assert(CUSPARSE_STATUS_SUCCESS == status);
printf("lworkInBytes = %lld \n", (long long)lworkInBytes);
// CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes);
cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes);
// CHECK: assert(hipSuccess == cudaStat1);
assert(cudaSuccess == cudaStat1);
/* step 5: solve Aj*xj = bj */
// NOTE: CUDA 10.0
// CHECK: status = hipsparseSgtsvInterleavedBatch(
status = cusparseSgtsvInterleavedBatch(
cusparseH,
algo,
n,
d_dl,
d_d,
d_du,
d_X,
batchSize,
d_work);
// CHECK: cudaStat1 = hipDeviceSynchronize();
cudaStat1 = cudaDeviceSynchronize();
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
assert(CUSPARSE_STATUS_SUCCESS == status);
// CHECK: assert(hipSuccess == cudaStat1);
assert(cudaSuccess == cudaStat1);
/* step 6: convert X back to aggregate format */
/* B = transpose(X) */
// CHECK: cublasStat = hipblasSgeam(
// CHECK: HIPBLAS_OP_T,
// CHECK: HIPBLAS_OP_T,
cublasStat = cublasSgeam(
cublasH,
CUBLAS_OP_T, /* transa */
CUBLAS_OP_T, /* transb, don't care */
n, /* number of rows of B */
batchSize, /* number of columns of B */
&h_one,
d_X, /* X is batchSize-by-n */
batchSize, /* leading dimension of X */
&h_zero,
NULL,
n, /* don't cae */
d_B, /* B is n-by-batchSize */
n /* leading dimension of B */
);
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
// CHECK: hipDeviceSynchronize();
cudaDeviceSynchronize();
/* step 7: residual evaluation */
// CHECK: cudaStat1 = hipMemcpy(X, d_B, sizeof(float)*n*batchSize, hipMemcpyDeviceToHost);
cudaStat1 = cudaMemcpy(X, d_B, sizeof(float)*n*batchSize, cudaMemcpyDeviceToHost);
// CHECK: assert(hipSuccess == cudaStat1);
assert(cudaSuccess == cudaStat1);
// CHECK: hipDeviceSynchronize();
cudaDeviceSynchronize();
printf("==== x1 = inv(A1)*b1 \n");
for (int j = 0; j < n; j++) {
printf("x1[%d] = %f\n", j, X[j]);
}
float r1_nrminf;
residaul_eval(
n,
dl,
d,
du,
B,
X,
&r1_nrminf
);
printf("|b1 - A1*x1| = %E\n", r1_nrminf);
printf("\n==== x2 = inv(A2)*b2 \n");
for (int j = 0; j < n; j++) {
printf("x2[%d] = %f\n", j, X[n + j]);
}
float r2_nrminf;
residaul_eval(
n,
dl + n,
d + n,
du + n,
B + n,
X + n,
&r2_nrminf
);
printf("|b2 - A2*x2| = %E\n", r2_nrminf);
/* free resources */
// CHECK: if (d_dl0) hipFree(d_dl0);
if (d_dl0) cudaFree(d_dl0);
// CHECK: if (d_d0) hipFree(d_d0);
if (d_d0) cudaFree(d_d0);
// CHECK: if (d_du0) hipFree(d_du0);
if (d_du0) cudaFree(d_du0);
// CHECK: if (d_dl) hipFree(d_dl);
if (d_dl) cudaFree(d_dl);
// CHECK: if (d_d) hipFree(d_d);
if (d_d) cudaFree(d_d);
// CHECK: if (d_du) hipFree(d_du);
if (d_du) cudaFree(d_du);
// CHECK: if (d_B) hipFree(d_B);
if (d_B) cudaFree(d_B);
// CHECK: if (d_X) hipFree(d_X);
if (d_X) cudaFree(d_X);
// CHECK: if (cusparseH) hipsparseDestroy(cusparseH);
if (cusparseH) cusparseDestroy(cusparseH);
// CHECK: if (cublasH) hipblasDestroy(cublasH);
if (cublasH) cublasDestroy(cublasH);
// CHECK: if (stream) hipStreamDestroy(stream);
if (stream) cudaStreamDestroy(stream);
// CHECK: hipDeviceReset();
cudaDeviceReset();
return 0;
}
@@ -23,6 +23,10 @@ config.test_source_root = os.path.dirname(__file__)
config.excludes = ['cmdparser.hpp']
config.cuda_version = "@CUDA_VERSION@"
if config.cuda_version not in ['10.0']:
config.excludes.append('cuSPARSE_08.cu')
# test_exec_root: The path where tests are located (default is the test suite root).
#config.test_exec_root = config.test_source_root