Merge pull request #775 from emankov/master

[HIPIFY][SPARSE] Helper and Level 1,2 functions

[ROCm/hip commit: dc5aea4b03]
Bu işleme şunda yer alıyor:
Evgeny Mankov
2018-11-26 15:14:40 +03:00
işlemeyi yapan: GitHub
işleme 3cedd8a783
5 değiştirilmiş dosya ile 974 ekleme ve 2 silme
+150 -1
Dosyayı Görüntüle
@@ -83,7 +83,156 @@
| struct |`pruneInfo` | |
| typedef |`pruneInfo_t` | |
## **2. CUSPARSE API functions**
## **2.cuSPARSE Helper Function Reference**
| **CUDA** | **HIP** |
|-----------------------------------------------------------|-------------------------------------------------|
|`cusparseCreate` |`hipsparseCreate` |
|`cusparseCreateSolveAnalysisInfo` | |
|`cusparseCreateHybMat` |`hipsparseCreateHybMat` |
|`cusparseCreateMatDescr` |`hipsparseCreateMatDescr` |
|`cusparseDestroy` |`hipsparseDestroy` |
|`cusparseDestroySolveAnalysisInfo` | |
|`cusparseDestroyHybMat` |`hipsparseDestroyHybMat` |
|`cusparseDestroyMatDescr` |`hipsparseDestroyMatDescr` |
|`cusparseGetLevelInfo` | |
|`cusparseGetMatDiagType` |`hipsparseGetMatDiagType` |
|`cusparseGetMatFillMode` |`hipsparseGetMatFillMode` |
|`cusparseGetMatIndexBase` |`hipsparseGetMatIndexBase` |
|`cusparseGetMatType` |`hipsparseGetMatType` |
|`cusparseGetPointerMode` |`hipsparseGetPointerMode` |
|`cusparseGetVersion` |`hipsparseGetVersion` |
|`cusparseSetMatDiagType` |`hipsparseSetMatDiagType` |
|`cusparseSetMatFillMode` |`hipsparseSetMatFillMode` |
|`cusparseSetMatType` |`hipsparseSetMatType` |
|`cusparseSetPointerMode` |`hipsparseSetPointerMode` |
|`cusparseSetStream` |`hipsparseSetStream` |
|`cusparseGetStream` |`hipsparseGetStream` |
|`cusparseCreateCsrsv2Info` |`hipsparseCreateCsrsv2Info` |
|`cusparseDestroyCsrsv2Info` |`hipsparseDestroyCsrsv2Info` |
|`cusparseCreateCsrsm2Info` | |
|`cusparseDestroyCsrsm2Info` | |
|`cusparseCreateCsric02Info` | |
|`cusparseDestroyCsric02Info` | |
|`cusparseCreateCsrilu02Info` |`hipsparseCreateCsrilu02Info` |
|`cusparseDestroyCsrilu02Info` |`hipsparseDestroyCsrilu02Info` |
|`cusparseCreateBsrsv2Info` | |
|`cusparseDestroyBsrsv2Info` | |
|`cusparseCreateBsrsm2Info` | |
|`cusparseDestroyBsrsm2Info` | |
|`cusparseCreateBsric02Info` | |
|`cusparseDestroyBsric02Info` | |
|`cusparseCreateBsrilu02Info` | |
|`cusparseDestroyBsrilu02Info` | |
|`cusparseCreateCsrgemm2Info` | |
|`cusparseDestroyCsrgemm2Info` | |
|`cusparseCreatePruneInfo` | |
|`cusparseDestroyPruneInfo` | |
## **3.cuSPARSE Level 1 Function Reference**
| **CUDA** | **HIP** |
|-----------------------------------------------------------|-------------------------------------------------|
|`cusparseSaxpyi` |`hipsparseSaxpyi` |
|`cusparseDaxpyi` |`hipsparseDaxpyi` |
|`cusparseCaxpyi` | |
|`cusparseZaxpyi` | |
|`cusparseSdoti` |`hipsparseSdoti` |
|`cusparseDdoti` |`hipsparseDdoti` |
|`cusparseCdoti` | |
|`cusparseZdoti` | |
|`cusparseCdotci` | |
|`cusparseZdotci` | |
|`cusparseSgthr` |`hipsparseSgthr` |
|`cusparseDgthr` |`hipsparseDgthr` |
|`cusparseCgthr` | |
|`cusparseZgthr` | |
|`cusparseSgthrz` |`hipsparseSgthrz` |
|`cusparseDgthrz` |`hipsparseDgthrz` |
|`cusparseCgthrz` | |
|`cusparseZgthrz` | |
|`cusparseSroti` |`hipsparseSroti` |
|`cusparseDroti` |`hipsparseDroti` |
|`cusparseSsctr` |`hipsparseSsctr` |
|`cusparseDsctr` |`hipsparseDsctr` |
|`cusparseCsctr` | |
|`cusparseZsctr` | |
## **4.cuSPARSE Level 2 Function Reference**
| **CUDA** | **HIP** |
|-----------------------------------------------------------|-------------------------------------------------|
|`cusparseSbsrmv` | |
|`cusparseDbsrmv` | |
|`cusparseCbsrmv` | |
|`cusparseZbsrmv` | |
|`cusparseSbsrxmv` | |
|`cusparseDbsrxmv` | |
|`cusparseCbsrxmv` | |
|`cusparseZbsrxmv` | |
|`cusparseScsrmv` |`hipsparseScsrmv` |
|`cusparseDcsrmv` |`hipsparseDcsrmv` |
|`cusparseCcsrmv` | |
|`cusparseZcsrmv` | |
|`cusparseCsrmvEx` | |
|`cusparseCsrmvEx_bufferSize` | |
|`cusparseScsrmv_mp` | |
|`cusparseDcsrmv_mp` | |
|`cusparseCcsrmv_mp` | |
|`cusparseZcsrmv_mp` | |
|`cusparseSgemvi` | |
|`cusparseDgemvi` | |
|`cusparseCgemvi` | |
|`cusparseZgemvi` | |
|`cusparseSgemvi_bufferSize` | |
|`cusparseDgemvi_bufferSize` | |
|`cusparseCgemvi_bufferSize` | |
|`cusparseZgemvi_bufferSize` | |
|`cusparseSbsrsv2_bufferSize` | |
|`cusparseDbsrsv2_bufferSize` | |
|`cusparseCbsrsv2_bufferSize` | |
|`cusparseZbsrsv2_bufferSize` | |
|`cusparseSbsrsv2_analysis` | |
|`cusparseDbsrsv2_analysis` | |
|`cusparseCbsrsv2_analysis` | |
|`cusparseZbsrsv2_analysis` | |
|`cusparseScsrsv_solve` | |
|`cusparseDcsrsv_solve` | |
|`cusparseCcsrsv_solve` | |
|`cusparseZcsrsv_solve` | |
|`cusparseXbsrsv2_zeroPivot` | |
|`cusparseScsrsv_analysis` | |
|`cusparseDcsrsv_analysis` | |
|`cusparseCcsrsv_analysis` | |
|`cusparseZcsrsv_analysis` | |
|`cusparseCsrsv_analysisEx` | |
|`cusparseScsrsv_solve` | |
|`cusparseDcsrsv_solve` | |
|`cusparseCcsrsv_solve` | |
|`cusparseZcsrsv_solve` | |
|`cusparseCsrsv_solveEx` | |
|`cusparseScsrsv2_bufferSize` |`hipsparseScsrsv2_bufferSize` |
|`cusparseDcsrsv2_bufferSize` |`hipsparseDcsrsv2_bufferSize` |
|`cusparseCcsrsv2_bufferSize` | |
|`cusparseZcsrsv2_bufferSize` | |
|`cusparseScsrsv2_analysis` |`hipsparseScsrsv2_analysis` |
|`cusparseDcsrsv2_analysis` |`hipsparseDcsrsv2_analysis` |
|`cusparseCcsrsv2_analysis` | |
|`cusparseZcsrsv2_analysis` | |
|`cusparseScsrsv2_solve` |`hipsparseScsrsv2_solve` |
|`cusparseDcsrsv2_solve` |`hipsparseDcsrsv2_solve` |
|`cusparseCcsrsv2_solve` | |
|`cusparseZcsrsv2_solve` | |
|`cusparseXcsrsv2_zeroPivot` |`hipsparseXcsrsv2_zeroPivot` |
|`cusparseShybmv` |`hipsparseShybmv` |
|`cusparseDhybmv` |`hipsparseDhybmv` |
|`cusparseChybmv` | |
|`cusparseZhybmv` | |
|`cusparseShybsv_analysis` | |
|`cusparseDhybsv_analysis` | |
|`cusparseChybsv_analysis` | |
|`cusparseZhybsv_analysis` | |
|`cusparseShybsv_solve` | |
|`cusparseDhybsv_solve` | |
|`cusparseChybsv_solve` | |
|`cusparseZhybsv_solve` | |
+172
Dosyayı Görüntüle
@@ -2,4 +2,176 @@
// Maps the names of CUDA SPARSE API types to the corresponding HIP types
const std::map<llvm::StringRef, hipCounter> CUDA_SPARSE_FUNCTION_MAP{
// 5. cuSPARSE Helper Function Reference
{"cusparseCreate", {"hipsparseCreate", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseCreateSolveAnalysisInfo", {"hipsparseCreateSolveAnalysisInfo", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCreateHybMat", {"hipsparseCreateHybMat", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseCreateMatDescr", {"hipsparseCreateMatDescr", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseDestroy", {"hipsparseDestroy", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseDestroySolveAnalysisInfo", {"hipsparseDestroySolveAnalysisInfo", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDestroyHybMat", {"hipsparseDestroyHybMat", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseDestroyMatDescr", {"hipsparseDestroyMatDescr", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseGetLevelInfo", {"hipsparseGetLevelInfo", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseGetMatDiagType", {"hipsparseGetMatDiagType", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseGetMatFillMode", {"hipsparseGetMatFillMode", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseGetMatIndexBase", {"hipsparseGetMatIndexBase", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseGetMatType", {"hipsparseGetMatType", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseGetPointerMode", {"hipsparseGetPointerMode", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseGetVersion", {"hipsparseGetVersion", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseSetMatDiagType", {"hipsparseSetMatDiagType", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseSetMatFillMode", {"hipsparseSetMatFillMode", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseSetMatIndexBase", {"hipsparseSetMatIndexBase", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseSetMatType", {"hipsparseSetMatType", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseSetPointerMode", {"hipsparseSetPointerMode", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseSetStream", {"hipsparseSetStream", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseGetStream", {"hipsparseGetStream", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseCreateCsrsv2Info", {"hipsparseCreateCsrsv2Info", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseDestroyCsrsv2Info", {"hipsparseDestroyCsrsv2Info", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseCreateCsrsm2Info", {"hipsparseCreateCsrsm2Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDestroyCsrsm2Info", {"hipsparseDestroyCsrsm2Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCreateCsric02Info", {"hipsparseCreateCsric02Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDestroyCsric02Info", {"hipsparseDestroyCsric02Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCreateCsrilu02Info", {"hipsparseCreateCsrilu02Info", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseDestroyCsrilu02Info", {"hipsparseDestroyCsrilu02Info", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseCreateBsrsv2Info", {"hipsparseCreateBsrsv2Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDestroyBsrsv2Info", {"hipsparseDestroyBsrsv2Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCreateBsrsm2Info", {"hipsparseCreateBsrsm2Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDestroyBsrsm2Info", {"hipsparseDestroyBsrsm2Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCreateBsric02Inf", {"hipsparseCreateBsric02Inf", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDestroyBsric02Info", {"hipsparseDestroyBsric02Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCreateBsrilu02Info", {"hipsparseCreateBsrilu02Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDestroyBsrilu02Info", {"hipsparseDestroyBsrilu02Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCreateCsrgemm2Info", {"hipsparseCreateCsrgemm2Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDestroyCsrgemm2Info", {"hipsparseDestroyCsrgemm2Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCreatePruneInfo", {"hipsparseCreatePruneInfo", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDestroyPruneInfo", {"hipsparseDestroyPruneInfo", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
// 6. cuSPARSE Level 1 Function Reference
{"cusparseSaxpyi", {"hipsparseSaxpyi", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseDaxpyi", {"hipsparseDaxpyi", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseCaxpyi", {"hipsparseCaxpyi", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZaxpyi", {"hipsparseZaxpyi", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSdoti", {"hipsparseSdoti", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseDdoti", {"hipsparseDdoti", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseCdoti", {"hipsparseCdoti", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZdoti", {"hipsparseZdoti", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCdotci", {"hipsparseCdotci", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZdotci", {"hipsparseZdotci", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSgthr", {"hipsparseSgthr", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseDgthr", {"hipsparseDgthr", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseCgthr", {"hipsparseCgthr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZgthr", {"hipsparseZgthr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSgthrz", {"hipsparseSgthrz", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseDgthrz", {"hipsparseDgthrz", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseCgthrz", {"hipsparseCgthrz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZgthrz", {"hipsparseZgthrz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSroti", {"hipsparseSroti", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseDroti", {"hipsparseDroti", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseSsctr", {"hipsparseSsctr", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseDsctr", {"hipsparseDsctr", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseCsctr", {"hipsparseCsctr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZsctr", {"hipsparseZsctr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
// 7. cuSPARSE Level 2 Function Reference
{"cusparseSbsrmv", {"hipsparseSbsrmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDbsrmv", {"hipsparseDbsrmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCbsrmv", {"hipsparseCbsrmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZbsrmv", {"hipsparseZbsrmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSbsrxmv", {"hipsparseSbsrxmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDbsrxmv", {"hipsparseDbsrxmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCbsrxmv", {"hipsparseCbsrxmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZbsrxmv", {"hipsparseZbsrxmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseScsrmv", {"hipsparseScsrmv", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseDcsrmv", {"hipsparseDcsrmv", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseCcsrmv", {"hipsparseCcsrmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsrmv", {"hipsparseZcsrmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCsrmvEx", {"hipsparseCsrmvEx", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCsrmvEx_bufferSize", {"hipsparseCsrmvEx_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseScsrmv_mp", {"hipsparseScsrmv_mp", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDcsrmv_mp", {"hipsparseDcsrmv_mp", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCcsrmv_mp", {"hipsparseCcsrmv_mp", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsrmv_mp", {"hipsparseZcsrmv_mp", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSgemvi", {"hipsparseSgemvi", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDgemvi", {"hipsparseDgemvi", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCgemvi", {"hipsparseCgemvi", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZgemvi", {"hipsparseZgemvi", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSgemvi_bufferSize", {"hipsparseSgemvi_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDgemvi_bufferSize", {"hipsparseDgemvi_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCgemvi_bufferSize", {"hipsparseCgemvi_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZgemvi_bufferSize", {"hipsparseZgemvi_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSbsrsv2_bufferSize", {"hipsparseSbsrsv2_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDbsrsv2_bufferSize", {"hipsparseDbsrsv2_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCbsrsv2_bufferSize", {"hipsparseCbsrsv2_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZbsrsv2_bufferSize", {"hipsparseZbsrsv2_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseSbsrsv2_analysis", {"hipsparseSbsrsv2_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDbsrsv2_analysis", {"hipsparseDbsrsv2_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCbsrsv2_analysis", {"hipsparseCbsrsv2_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZbsrsv2_analysis", {"hipsparseZbsrsv2_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseScsrsv_solve", {"hipsparseScsrsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDcsrsv_solve", {"hipsparseDcsrsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCcsrsv_solve", {"hipsparseCcsrsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsrsv_solve", {"hipsparseZcsrsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseXbsrsv2_zeroPivot", {"hipsparseXbsrsv2_zeroPivot", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseScsrsv_analysis", {"hipsparseScsrsv_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDcsrsv_analysis", {"hipsparseDcsrsv_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCcsrsv_analysis", {"hipsparseCcsrsv_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsrsv_analysis", {"hipsparseZcsrsv_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCsrsv_analysisEx", {"hipsparseCsrsv_analysisEx", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseScsrsv_solve", {"hipsparseScsrsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDcsrsv_solve", {"hipsparseDcsrsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCcsrsv_solve", {"hipsparseCcsrsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsrsv_solve", {"hipsparseZcsrsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseCsrsv_solveEx", {"hipsparseCsrsv_solveEx", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseScsrsv2_bufferSize", {"hipsparseScsrsv2_bufferSize", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseDcsrsv2_bufferSize", {"hipsparseDcsrsv2_bufferSize", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseCcsrsv2_bufferSize", {"hipsparseCcsrsv2_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsrsv2_bufferSize", {"hipsparseZcsrsv2_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseScsrsv2_analysis", {"hipsparseScsrsv2_analysis", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseDcsrsv2_analysis", {"hipsparseDcsrsv2_analysis", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseCcsrsv2_analysis", {"hipsparseCcsrsv2_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsrsv2_analysis", {"hipsparseZcsrsv2_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseScsrsv2_solve", {"hipsparseScsrsv2_solve", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseDcsrsv2_solve", {"hipsparseDcsrsv2_solve", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseCcsrsv2_solve", {"hipsparseCcsrsv2_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZcsrsv2_solve", {"hipsparseZcsrsv2_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseXcsrsv2_zeroPivot", {"hipsparseXcsrsv2_zeroPivot", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseShybmv", {"hipsparseShybmv", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseDhybmv", {"hipsparseDhybmv", CONV_LIB_FUNC, API_SPARSE}},
{"cusparseChybmv", {"hipsparseChybmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZhybmv", {"hipsparseZhybmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseShybsv_analysis", {"hipsparseShybsv_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDhybsv_analysis", {"hipsparseDhybsv_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseChybsv_analysis", {"hipsparseChybsv_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZhybsv_analysis", {"hipsparseZhybsv_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseShybsv_solve", {"hipsparseShybsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseDhybsv_solve", {"hipsparseDhybsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseChybsv_solve", {"hipsparseChybsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
{"cusparseZhybsv_solve", {"hipsparseZhybsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
};
+1 -1
Dosyayı Görüntüle
@@ -97,7 +97,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_SPARSE_TYPE_NAME_MAP{
{"CUSPARSE_SOLVE_POLICY_NO_LEVEL", {"HIPSPARSE_SOLVE_POLICY_NO_LEVEL", CONV_NUMERIC_LITERAL, API_SPARSE}},
{"CUSPARSE_SOLVE_POLICY_USE_LEVEL", {"HIPSPARSE_SOLVE_POLICY_USE_LEVEL", CONV_NUMERIC_LITERAL, API_SPARSE}},
{"cusparseStatus_t", {"hipsparseMatrixType_t", CONV_TYPE, API_SPARSE}},
{"cusparseStatus_t", {"hipsparseStatus_t", CONV_TYPE, API_SPARSE}},
{"CUSPARSE_STATUS_SUCCESS", {"HIPSPARSE_STATUS_SUCCESS", CONV_NUMERIC_LITERAL, API_SPARSE}},
{"CUSPARSE_STATUS_NOT_INITIALIZED", {"HIPSPARSE_STATUS_NOT_INITIALIZED", CONV_NUMERIC_LITERAL, API_SPARSE}},
{"CUSPARSE_STATUS_ALLOC_FAILED", {"HIPSPARSE_STATUS_ALLOC_FAILED", CONV_NUMERIC_LITERAL, API_SPARSE}},
+367
Dosyayı Görüntüle
@@ -0,0 +1,367 @@
// RUN: %run_test hipify "%s" "%t" %cuda_args
#include <stdio.h>
#include <stdlib.h>
// CHECK: #include <hip/hip_runtime.h>
#include <cuda_runtime.h>
// CHECK: #include "hipsparse.h"
#include "cusparse.h"
// CHECK: if (y) hipFree(y);
// CHECK: if (z) hipFree(z);
// CHECK: if (xInd) hipFree(xInd);
// CHECK: if (xVal) hipFree(xVal);
// CHECK: if (csrRowPtr) hipFree(csrRowPtr);
// CHECK: if (cooRowIndex) hipFree(cooRowIndex);
// CHECK: if (cooColIndex) hipFree(cooColIndex);
// CHECK: if (cooVal) hipFree(cooVal);
// CHECK: if (descr) hipsparseDestroyMatDescr(descr);
// CHECK: if (handle) hipsparseDestroy(handle);
// CHECK: hipDeviceReset();
#define CLEANUP(s) \
do { \
printf ("%s\n", s); \
if (yHostPtr) free(yHostPtr); \
if (zHostPtr) free(zHostPtr); \
if (xIndHostPtr) free(xIndHostPtr); \
if (xValHostPtr) free(xValHostPtr); \
if (cooRowIndexHostPtr) free(cooRowIndexHostPtr);\
if (cooColIndexHostPtr) free(cooColIndexHostPtr);\
if (cooValHostPtr) free(cooValHostPtr); \
if (y) cudaFree(y); \
if (z) cudaFree(z); \
if (xInd) cudaFree(xInd); \
if (xVal) cudaFree(xVal); \
if (csrRowPtr) cudaFree(csrRowPtr); \
if (cooRowIndex) cudaFree(cooRowIndex); \
if (cooColIndex) cudaFree(cooColIndex); \
if (cooVal) cudaFree(cooVal); \
if (descr) cusparseDestroyMatDescr(descr);\
if (handle) cusparseDestroy(handle); \
cudaDeviceReset(); \
fflush (stdout); \
} while (0)
int main(){
// CHECK: hipError_t cudaStat1,cudaStat2,cudaStat3,cudaStat4,cudaStat5,cudaStat6;
cudaError_t cudaStat1,cudaStat2,cudaStat3,cudaStat4,cudaStat5,cudaStat6;
// CHECK: hipsparseStatus_t status;
cusparseStatus_t status;
// CHECK: hipsparseHandle_t handle=0;
cusparseHandle_t handle=0;
// CHECK: hipsparseMatDescr_t descr=0;
cusparseMatDescr_t descr=0;
int * cooRowIndexHostPtr=0;
int * cooColIndexHostPtr=0;
double * cooValHostPtr=0;
int * cooRowIndex=0;
int * cooColIndex=0;
double * cooVal=0;
int * xIndHostPtr=0;
double * xValHostPtr=0;
double * yHostPtr=0;
int * xInd=0;
double * xVal=0;
double * y=0;
int * csrRowPtr=0;
double * zHostPtr=0;
double * z=0;
int n, nnz, nnz_vector;
double dzero =0.0;
double dtwo =2.0;
double dthree=3.0;
double dfive =5.0;
printf("testing example\n");
/* create the following sparse test matrix in COO format */
/* |1.0 2.0 3.0|
| 4.0 |
|5.0 6.0 7.0|
| 8.0 9.0| */
n=4; nnz=9;
cooRowIndexHostPtr = (int *) malloc(nnz*sizeof(cooRowIndexHostPtr[0]));
cooColIndexHostPtr = (int *) malloc(nnz*sizeof(cooColIndexHostPtr[0]));
cooValHostPtr = (double *)malloc(nnz*sizeof(cooValHostPtr[0]));
if ((!cooRowIndexHostPtr) || (!cooColIndexHostPtr) || (!cooValHostPtr)){
CLEANUP("Host malloc failed (matrix)");
return 1;
}
cooRowIndexHostPtr[0]=0; cooColIndexHostPtr[0]=0; cooValHostPtr[0]=1.0;
cooRowIndexHostPtr[1]=0; cooColIndexHostPtr[1]=2; cooValHostPtr[1]=2.0;
cooRowIndexHostPtr[2]=0; cooColIndexHostPtr[2]=3; cooValHostPtr[2]=3.0;
cooRowIndexHostPtr[3]=1; cooColIndexHostPtr[3]=1; cooValHostPtr[3]=4.0;
cooRowIndexHostPtr[4]=2; cooColIndexHostPtr[4]=0; cooValHostPtr[4]=5.0;
cooRowIndexHostPtr[5]=2; cooColIndexHostPtr[5]=2; cooValHostPtr[5]=6.0;
cooRowIndexHostPtr[6]=2; cooColIndexHostPtr[6]=3; cooValHostPtr[6]=7.0;
cooRowIndexHostPtr[7]=3; cooColIndexHostPtr[7]=1; cooValHostPtr[7]=8.0;
cooRowIndexHostPtr[8]=3; cooColIndexHostPtr[8]=3; cooValHostPtr[8]=9.0;
nnz_vector = 3;
xIndHostPtr = (int *) malloc(nnz_vector*sizeof(xIndHostPtr[0]));
xValHostPtr = (double *)malloc(nnz_vector*sizeof(xValHostPtr[0]));
yHostPtr = (double *)malloc(2*n *sizeof(yHostPtr[0]));
zHostPtr = (double *)malloc(2*(n+1) *sizeof(zHostPtr[0]));
if((!xIndHostPtr) || (!xValHostPtr) || (!yHostPtr) || (!zHostPtr)) {
CLEANUP("Host malloc failed (vectors)");
return 1;
}
yHostPtr[0] = 10.0;
xIndHostPtr[0]=0;
xValHostPtr[0]=100.0;
yHostPtr[1] = 20.0;
xIndHostPtr[1]=1;
xValHostPtr[1]=200.0;
yHostPtr[2] = 30.0;
yHostPtr[3] = 40.0;
xIndHostPtr[2]=3;
xValHostPtr[2]=400.0;
yHostPtr[4] = 50.0;
yHostPtr[5] = 60.0;
yHostPtr[6] = 70.0;
yHostPtr[7] = 80.0;
/* allocate GPU memory and copy the matrix and vectors into it */
// CHECK: cudaStat1 = hipMalloc((void**)&cooRowIndex,nnz*sizeof(cooRowIndex[0]));
cudaStat1 = cudaMalloc((void**)&cooRowIndex,nnz*sizeof(cooRowIndex[0]));
// CHECK: cudaStat2 = hipMalloc((void**)&cooColIndex,nnz*sizeof(cooColIndex[0]));
cudaStat2 = cudaMalloc((void**)&cooColIndex,nnz*sizeof(cooColIndex[0]));
// CHECK: cudaStat3 = hipMalloc((void**)&cooVal, nnz*sizeof(cooVal[0]));
cudaStat3 = cudaMalloc((void**)&cooVal, nnz*sizeof(cooVal[0]));
// CHECK: cudaStat4 = hipMalloc((void**)&y, 2*n*sizeof(y[0]));
cudaStat4 = cudaMalloc((void**)&y, 2*n*sizeof(y[0]));
// CHECK: cudaStat5 = hipMalloc((void**)&xInd,nnz_vector*sizeof(xInd[0]));
cudaStat5 = cudaMalloc((void**)&xInd,nnz_vector*sizeof(xInd[0]));
// CHECK: cudaStat6 = hipMalloc((void**)&xVal,nnz_vector*sizeof(xVal[0]));
cudaStat6 = cudaMalloc((void**)&xVal,nnz_vector*sizeof(xVal[0]));
// CHECK: if ((cudaStat1 != hipSuccess) ||
// CHECK: (cudaStat2 != hipSuccess) ||
// CHECK: (cudaStat3 != hipSuccess) ||
// CHECK: (cudaStat4 != hipSuccess) ||
// CHECK: (cudaStat5 != hipSuccess) ||
// CHECK: (cudaStat6 != hipSuccess)) {
if ((cudaStat1 != cudaSuccess) ||
(cudaStat2 != cudaSuccess) ||
(cudaStat3 != cudaSuccess) ||
(cudaStat4 != cudaSuccess) ||
(cudaStat5 != cudaSuccess) ||
(cudaStat6 != cudaSuccess)) {
CLEANUP("Device malloc failed");
return 1;
}
// CHECK: cudaStat1 = hipMemcpy(cooRowIndex, cooRowIndexHostPtr,
// CHECK: hipMemcpyHostToDevice);
cudaStat1 = cudaMemcpy(cooRowIndex, cooRowIndexHostPtr,
(size_t)(nnz*sizeof(cooRowIndex[0])),
cudaMemcpyHostToDevice);
// CHECK: cudaStat2 = hipMemcpy(cooColIndex, cooColIndexHostPtr,
// CHECK: hipMemcpyHostToDevice);
cudaStat2 = cudaMemcpy(cooColIndex, cooColIndexHostPtr,
(size_t)(nnz*sizeof(cooColIndex[0])),
cudaMemcpyHostToDevice);
// CHECK: cudaStat3 = hipMemcpy(cooVal, cooValHostPtr,
// CHECK: hipMemcpyHostToDevice);
cudaStat3 = cudaMemcpy(cooVal, cooValHostPtr,
(size_t)(nnz*sizeof(cooVal[0])),
cudaMemcpyHostToDevice);
// CHECK: cudaStat4 = hipMemcpy(y, yHostPtr,
// CHECK: hipMemcpyHostToDevice);
cudaStat4 = cudaMemcpy(y, yHostPtr,
(size_t)(2*n*sizeof(y[0])),
cudaMemcpyHostToDevice);
// CHECK: cudaStat5 = hipMemcpy(xInd, xIndHostPtr,
// CHECK: hipMemcpyHostToDevice);
cudaStat5 = cudaMemcpy(xInd, xIndHostPtr,
(size_t)(nnz_vector*sizeof(xInd[0])),
cudaMemcpyHostToDevice);
// CHECK: cudaStat6 = hipMemcpy(xVal, xValHostPtr,
// CHECK: hipMemcpyHostToDevice);
cudaStat6 = cudaMemcpy(xVal, xValHostPtr,
(size_t)(nnz_vector*sizeof(xVal[0])),
cudaMemcpyHostToDevice);
// CHECK: if ((cudaStat1 != hipSuccess) ||
// CHECK: (cudaStat2 != hipSuccess) ||
// CHECK: (cudaStat3 != hipSuccess) ||
// CHECK: (cudaStat4 != hipSuccess) ||
// CHECK: (cudaStat5 != hipSuccess) ||
// CHECK: (cudaStat6 != hipSuccess)) {
if ((cudaStat1 != cudaSuccess) ||
(cudaStat2 != cudaSuccess) ||
(cudaStat3 != cudaSuccess) ||
(cudaStat4 != cudaSuccess) ||
(cudaStat5 != cudaSuccess) ||
(cudaStat6 != cudaSuccess)) {
CLEANUP("Memcpy from Host to Device failed");
return 1;
}
/* initialize cusparse library */
// CHECK: status= hipsparseCreate(&handle);
status= cusparseCreate(&handle);
// CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) {
if (status != CUSPARSE_STATUS_SUCCESS) {
CLEANUP("CUSPARSE Library initialization failed");
return 1;
}
/* create and setup matrix descriptor */
// CHECK: status= hipsparseCreateMatDescr(&descr);
status= cusparseCreateMatDescr(&descr);
// CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) {
if (status != CUSPARSE_STATUS_SUCCESS) {
CLEANUP("Matrix descriptor initialization failed");
return 1;
}
// CHECK: hipsparseSetMatType(descr,HIPSPARSE_MATRIX_TYPE_GENERAL);
cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL);
// CHECK: hipsparseSetMatIndexBase(descr,HIPSPARSE_INDEX_BASE_ZERO);
cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO);
/* exercise conversion routines (convert matrix from COO 2 CSR format) */
// CHECK: cudaStat1 = hipMalloc((void**)&csrRowPtr,(n+1)*sizeof(csrRowPtr[0]));
cudaStat1 = cudaMalloc((void**)&csrRowPtr,(n+1)*sizeof(csrRowPtr[0]));
// CHECK: if (cudaStat1 != hipSuccess) {
if (cudaStat1 != cudaSuccess) {
CLEANUP("Device malloc failed (csrRowPtr)");
return 1;
}
status= cusparseXcoo2csr(handle,cooRowIndex,nnz,n,
// CHECK: csrRowPtr,HIPSPARSE_INDEX_BASE_ZERO);
csrRowPtr,CUSPARSE_INDEX_BASE_ZERO);
// CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) {
if (status != CUSPARSE_STATUS_SUCCESS) {
CLEANUP("Conversion from COO to CSR format failed");
return 1;
}
//csrRowPtr = [0 3 4 7 9]
// The following test only works for compute capability 1.3 and above
// because it needs double precision.
int devId;
// CHECK: hipDeviceProp_t prop;
cudaDeviceProp prop;
// CHECK: hipError_t cudaStat;
cudaError_t cudaStat;
// CHECK: cudaStat = hipGetDevice(&devId);
cudaStat = cudaGetDevice(&devId);
// CHECK: if (hipSuccess != cudaStat){
if (cudaSuccess != cudaStat){
// CLEANUP("hipGetDevice failed");
CLEANUP("cudaGetDevice failed");
// printf("Error: cudaStat %d, %s\n", cudaStat, hipGetErrorString(cudaStat));
printf("Error: cudaStat %d, %s\n", cudaStat, cudaGetErrorString(cudaStat));
return 1;
}
// CHECK: cudaStat = hipGetDeviceProperties( &prop, devId);
cudaStat = cudaGetDeviceProperties( &prop, devId);
// CHECK: if (hipSuccess != cudaStat) {
if (cudaSuccess != cudaStat) {
// CHECK: CLEANUP("hipGetDeviceProperties failed");
CLEANUP("cudaGetDeviceProperties failed");
// CHECK: printf("Error: cudaStat %d, %s\n", cudaStat, hipGetErrorString(cudaStat));
printf("Error: cudaStat %d, %s\n", cudaStat, cudaGetErrorString(cudaStat));
return 1;
}
int cc = 100*prop.major + 10*prop.minor;
if (cc < 130){
CLEANUP("waive the test because only sm13 and above are supported\n");
printf("the device has compute capability %d\n", cc);
printf("example test WAIVED");
return 2;
}
/* exercise Level 1 routines (scatter vector elements) */
// TODO: status= hipsparseDsctr(handle, nnz_vector, xVal, xInd,
// CHECK: &y[n], HIPSPARSE_INDEX_BASE_ZERO);
status= cusparseDsctr(handle, nnz_vector, xVal, xInd,
&y[n], CUSPARSE_INDEX_BASE_ZERO);
// CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) {
if (status != CUSPARSE_STATUS_SUCCESS) {
CLEANUP("Scatter from sparse to dense vector failed");
return 1;
}
//y = [10 20 30 40 | 100 200 70 400]
/* exercise Level 2 routines (csrmv) */
// CHECK: status= hipsparseDcsrmv(handle,HIPSPARSE_OPERATION_NON_TRANSPOSE, n, n, nnz,
status= cusparseDcsrmv(handle,CUSPARSE_OPERATION_NON_TRANSPOSE, n, n, nnz,
&dtwo, descr, cooVal, csrRowPtr, cooColIndex,
&y[0], &dthree, &y[n]);
// CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) {
if (status != CUSPARSE_STATUS_SUCCESS) {
CLEANUP("Matrix-vector multiplication failed");
return 1;
}
//y = [10 20 30 40 | 680 760 1230 2240]
// CHECK: hipMemcpy(yHostPtr, y, (size_t)(2*n*sizeof(y[0])), hipMemcpyDeviceToHost);
cudaMemcpy(yHostPtr, y, (size_t)(2*n*sizeof(y[0])), cudaMemcpyDeviceToHost);
/* exercise Level 3 routines (csrmm) */
// cudaStat1 = hipMalloc((void**)&z, 2*(n+1)*sizeof(z[0]));
cudaStat1 = cudaMalloc((void**)&z, 2*(n+1)*sizeof(z[0]));
// CHECK: if (cudaStat1 != hipSuccess) {
if (cudaStat1 != cudaSuccess) {
CLEANUP("Device malloc failed (z)");
return 1;
}
// CHECK: cudaStat1 = hipMemset((void *)z,0, 2*(n+1)*sizeof(z[0]));
cudaStat1 = cudaMemset((void *)z,0, 2*(n+1)*sizeof(z[0]));
// CHECK: if (cudaStat1 != hipSuccess) {
if (cudaStat1 != cudaSuccess) {
CLEANUP("Memset on Device failed");
return 1;
}
// TODO: status= hipsparseDcsrmm(handle, HIPSPARSE_OPERATION_NON_TRANSPOSE, n, 2, n,
status= cusparseDcsrmm(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n, 2, n,
nnz, &dfive, descr, cooVal, csrRowPtr, cooColIndex,
y, n, &dzero, z, n+1);
// CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) {
if (status != CUSPARSE_STATUS_SUCCESS) {
CLEANUP("Matrix-matrix multiplication failed");
return 1;
}
/* print final results (z) */
// CHECK: cudaStat1 = hipMemcpy(zHostPtr, z,
// CHECK: hipMemcpyDeviceToHost);
cudaStat1 = cudaMemcpy(zHostPtr, z,
(size_t)(2*(n+1)*sizeof(z[0])),
cudaMemcpyDeviceToHost);
// CHECK: if (cudaStat1 != hipSuccess) {
if (cudaStat1 != cudaSuccess) {
CLEANUP("Memcpy from Device to Host failed");
return 1;
}
//z = [950 400 2550 2600 0 | 49300 15200 132300 131200 0]
/* destroy matrix descriptor */
// status = hipsparseDestroyMatDescr(descr);
status = cusparseDestroyMatDescr(descr);
descr = 0;
// CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) {
if (status != CUSPARSE_STATUS_SUCCESS) {
CLEANUP("Matrix descriptor destruction failed");
return 1;
}
/* destroy handle */
// CHECK: status = hipsparseDestroy(handle);
status = cusparseDestroy(handle);
handle = 0;
// CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) {
if (status != CUSPARSE_STATUS_SUCCESS) {
CLEANUP("CUSPARSE Library release of resources failed");
return 1;
}
/* check the results */
// Notice that CLEANUP() contains a call to cusparseDestroy(handle)
if ((zHostPtr[0] != 950.0) ||
(zHostPtr[1] != 400.0) ||
(zHostPtr[2] != 2550.0) ||
(zHostPtr[3] != 2600.0) ||
(zHostPtr[4] != 0.0) ||
(zHostPtr[5] != 49300.0) ||
(zHostPtr[6] != 15200.0) ||
(zHostPtr[7] != 132300.0) ||
(zHostPtr[8] != 131200.0) ||
(zHostPtr[9] != 0.0) ||
(yHostPtr[0] != 10.0) ||
(yHostPtr[1] != 20.0) ||
(yHostPtr[2] != 30.0) ||
(yHostPtr[3] != 40.0) ||
(yHostPtr[4] != 680.0) ||
(yHostPtr[5] != 760.0) ||
(yHostPtr[6] != 1230.0) ||
(yHostPtr[7] != 2240.0)) {
CLEANUP("example test FAILED");
return 1;
} else {
CLEANUP("example test PASSED");
return 0;
}
}
+284
Dosyayı Görüntüle
@@ -0,0 +1,284 @@
// RUN: %run_test hipify "%s" "%t" %cuda_args
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
// CHECK: #include <hip/hip_runtime.h>
#include <cuda_runtime.h>
// CHECK: #include <hipblas.h>
#include <cublas_v2.h>
// CHECK: #include "hipsparse.h"
#include "cusparse.h"
void printMatrix(int m, int n, const double*A, int lda, const char* name)
{
for(int row = 0 ; row < m ; row++){
for(int col = 0 ; col < n ; col++){
double Areg = A[row + col*lda];
printf("%s(%d,%d) = %f\n", name, row+1, col+1, Areg);
}
}
}
int main(int argc, char*argv[])
{
// CHECK: hipblasHandle_t cublasH = NULL;
cublasHandle_t cublasH = NULL;
// CHECK: hipsparseHandle_t cusparseH = NULL;
cusparseHandle_t cusparseH = NULL;
// CHECK: hipStream_t stream = NULL;
cudaStream_t stream = NULL;
// CHECK: hipsparseMatDescr_t descrA = NULL;
cusparseMatDescr_t descrA = NULL;
// CHECK: hipblasStatus_t cublasStat = HIPBLAS_STATUS_SUCCESS;
cublasStatus_t cublasStat = CUBLAS_STATUS_SUCCESS;
// CHECK: hipsparseStatus_t cusparseStat = HIPSPARSE_STATUS_SUCCESS;
cusparseStatus_t cusparseStat = CUSPARSE_STATUS_SUCCESS;
// CHECK: hipError_t cudaStat1 = hipSuccess;
// CHECK: hipError_t cudaStat2 = hipSuccess;
// CHECK: hipError_t cudaStat3 = hipSuccess;
// CHECK: hipError_t cudaStat4 = hipSuccess;
// CHECK: hipError_t cudaStat5 = hipSuccess;
cudaError_t cudaStat1 = cudaSuccess;
cudaError_t cudaStat2 = cudaSuccess;
cudaError_t cudaStat3 = cudaSuccess;
cudaError_t cudaStat4 = cudaSuccess;
cudaError_t cudaStat5 = cudaSuccess;
const int n = 4;
const int nnzA = 9;
/*
* | 1 0 2 3 |
* | 0 4 0 0 |
* A = | 5 0 6 7 |
* | 0 8 0 9 |
*
* eigevales are { -0.5311, 7.5311, 9.0000, 4.0000 }
*
* The largest eigenvaluse is 9 and corresponding eigenvector is
*
* | 0.3029 |
* v = | 0 |
* | 0.9350 |
* | 0.1844 |
*/
const int csrRowPtrA[n+1] = { 0, 3, 4, 7, 9 };
const int csrColIndA[nnzA] = {0, 2, 3, 1, 0, 2, 3, 1, 3 };
const double csrValA[nnzA] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 };
const double lambda_exact[n] = { 9.0000, 7.5311, 4.0000, -0.5311 };
const double x0[n] = {1.0, 2.0, 3.0, 4.0 }; /* initial guess */
double x[n]; /* numerical eigenvector */
int *d_csrRowPtrA = NULL;
int *d_csrColIndA = NULL;
double *d_csrValA = NULL;
double *d_x = NULL; /* eigenvector */
double *d_y = NULL; /* workspace */
const double tol = 1.e-6;
const int max_ites = 30;
const double h_one = 1.0;
const double h_zero = 0.0;
printf("example of csrmv_mp \n");
printf("tol = %E \n", tol);
printf("max. iterations = %d \n", max_ites);
printf("1st eigenvaluse is %f\n", lambda_exact[0] );
printf("2nd eigenvaluse is %f\n", lambda_exact[1] );
double alpha = lambda_exact[1]/lambda_exact[0] ;
printf("convergence rate is %f\n", alpha );
double est_iterations = log(tol)/log(alpha);
printf("# of iterations required is %d\n", (int)ceil(est_iterations));
// step 1: create cublas/cusparse handle, bind a stream
// CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking);
cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
// CHECK: assert(hipSuccess == cudaStat1);
assert(cudaSuccess == cudaStat1);
// CHECK: cublasStat = hipblasCreate(&cublasH);
cublasStat = cublasCreate(&cublasH);
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
// CHECK: cublasStat = hipblasSetStream(cublasH, stream);
cublasStat = cublasSetStream(cublasH, stream);
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
// CHECK: cusparseStat = hipsparseCreate(&cusparseH);
cusparseStat = cusparseCreate(&cusparseH);
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == cusparseStat);
assert(CUSPARSE_STATUS_SUCCESS == cusparseStat);
// CHECK: cusparseStat = hipsparseSetStream(cusparseH, stream);
cusparseStat = cusparseSetStream(cusparseH, stream);
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == cusparseStat);
assert(CUSPARSE_STATUS_SUCCESS == cusparseStat);
// step 2: configuration of matrix A
// cusparseStat = hipsparseCreateMatDescr(&descrA);
cusparseStat = cusparseCreateMatDescr(&descrA);
// assert(HIPSPARSE_STATUS_SUCCESS == cusparseStat);
assert(CUSPARSE_STATUS_SUCCESS == cusparseStat);
// CHECK: hipsparseSetMatIndexBase(descrA,HIPSPARSE_INDEX_BASE_ZERO);
cusparseSetMatIndexBase(descrA,CUSPARSE_INDEX_BASE_ZERO);
// CHECK: hipsparseSetMatType(descrA, HIPSPARSE_MATRIX_TYPE_GENERAL );
cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL );
// step 3: copy A and x0 to device
// CHECK: cudaStat1 = hipMalloc ((void**)&d_csrRowPtrA, sizeof(int) * (n+1) );
cudaStat1 = cudaMalloc ((void**)&d_csrRowPtrA, sizeof(int) * (n+1) );
// CHECK: cudaStat2 = hipMalloc ((void**)&d_csrColIndA, sizeof(int) * nnzA );
cudaStat2 = cudaMalloc ((void**)&d_csrColIndA, sizeof(int) * nnzA );
// CHECK: cudaStat3 = hipMalloc ((void**)&d_csrValA , sizeof(double) * nnzA );
cudaStat3 = cudaMalloc ((void**)&d_csrValA , sizeof(double) * nnzA );
// CHECK: cudaStat4 = hipMalloc ((void**)&d_x , sizeof(double) * n );
cudaStat4 = cudaMalloc ((void**)&d_x , sizeof(double) * n );
// CHECK: cudaStat5 = hipMalloc ((void**)&d_y , sizeof(double) * n );
cudaStat5 = cudaMalloc ((void**)&d_y , sizeof(double) * n );
// CHECK: assert(hipSuccess == cudaStat1);
// CHECK: assert(hipSuccess == cudaStat2);
// CHECK: assert(hipSuccess == cudaStat3);
// CHECK: assert(hipSuccess == cudaStat4);
// CHECK: assert(hipSuccess == cudaStat5);
assert(cudaSuccess == cudaStat1);
assert(cudaSuccess == cudaStat2);
assert(cudaSuccess == cudaStat3);
assert(cudaSuccess == cudaStat4);
assert(cudaSuccess == cudaStat5);
// CHECK: cudaStat1 = hipMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int) * (n+1) , hipMemcpyHostToDevice);
cudaStat1 = cudaMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int) * (n+1) , cudaMemcpyHostToDevice);
// CHECK: cudaStat2 = hipMemcpy(d_csrColIndA, csrColIndA, sizeof(int) * nnzA , hipMemcpyHostToDevice);
cudaStat2 = cudaMemcpy(d_csrColIndA, csrColIndA, sizeof(int) * nnzA , cudaMemcpyHostToDevice);
// CHECK: cudaStat3 = hipMemcpy(d_csrValA , csrValA , sizeof(double) * nnzA , hipMemcpyHostToDevice);
cudaStat3 = cudaMemcpy(d_csrValA , csrValA , sizeof(double) * nnzA , cudaMemcpyHostToDevice);
// CHECK: assert(hipSuccess == cudaStat1);
assert(cudaSuccess == cudaStat1);
// CHECK: assert(hipSuccess == cudaStat2);
assert(cudaSuccess == cudaStat2);
// CHECK: assert(hipSuccess == cudaStat3);
assert(cudaSuccess == cudaStat3);
// step 4: power method
double lambda = 0.0;
double lambda_next = 0.0;
// 4.1: initial guess x0
cudaStat1 = cudaMemcpy(d_x, x0, sizeof(double) * n, cudaMemcpyHostToDevice);
// CHECK: assert(hipSuccess == cudaStat1);
assert(cudaSuccess == cudaStat1);
for(int ite = 0 ; ite < max_ites ; ite++ ){
// 4.2: normalize vector x
// x = x / |x|
double nrm2_x;
// TODO: cublasStat = hipblasDnrm2_v2(cublasH,
cublasStat = cublasDnrm2_v2(cublasH,
n,
d_x,
1, // incx,
&nrm2_x /* host pointer */
);
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
double one_over_nrm2_x = 1.0 / nrm2_x;
// TODO: cublasStat = hipblasDscal_v2( cublasH,
cublasStat = cublasDscal_v2( cublasH,
n,
&one_over_nrm2_x, /* host pointer */
d_x,
1 // incx
);
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
// 4.3: y = A*x
// TODO: hipsparseStat = cusparseDcsrmv_mp(cusparseH,
// CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE
cusparseStat = cusparseDcsrmv_mp(cusparseH,
CUSPARSE_OPERATION_NON_TRANSPOSE,
n,
n,
nnzA,
&h_one,
descrA,
d_csrValA,
d_csrRowPtrA,
d_csrColIndA,
d_x,
&h_zero,
d_y);
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == cusparseStat);
assert(CUSPARSE_STATUS_SUCCESS == cusparseStat);
// 4.4: lambda = y**T*x
// TODO: cublasStat = hipblasDdot_v2 ( cublasH,
cublasStat = cublasDdot_v2 ( cublasH,
n,
d_x,
1, // incx,
d_y,
1, // incy,
&lambda_next /* host pointer */
);
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
double lambda_err = fabs( lambda_next - lambda_exact[0] );
printf("ite %d: lambda = %f, error = %E\n", ite, lambda_next, lambda_err );
// 4.5: check if converges
if ( (ite > 0) &&
fabs( lambda - lambda_next ) < tol
){
break; // converges
}
/*
* 4.6: x := y
* lambda = lambda_next
*
* so new approximation is (lambda, x), x is not normalized.
*/
// CHECK: cudaStat1 = hipMemcpy(d_x, d_y, sizeof(double) * n , hipMemcpyDeviceToDevice);
cudaStat1 = cudaMemcpy(d_x, d_y, sizeof(double) * n , cudaMemcpyDeviceToDevice);
// CHECK: assert(hipSuccess == cudaStat1);
assert(cudaSuccess == cudaStat1);
lambda = lambda_next;
}
// step 5: report eigen-pair
// CHECK: cudaStat1 = hipMemcpy(x, d_x, sizeof(double) * n, hipMemcpyDeviceToHost);
cudaStat1 = cudaMemcpy(x, d_x, sizeof(double) * n, cudaMemcpyDeviceToHost);
// CHECK: assert(hipSuccess == cudaStat1);
assert(cudaSuccess == cudaStat1);
printf("largest eigenvalue is %E\n", lambda );
printf("eigenvector = (matlab base-1)\n");
printMatrix(n, 1, x, n, "V0");
printf("=====\n");
// free resources
// CHECK: if (d_csrRowPtrA ) hipFree(d_csrRowPtrA);
if (d_csrRowPtrA ) cudaFree(d_csrRowPtrA);
// CHECK: if (d_csrColIndA ) hipFree(d_csrColIndA);
if (d_csrColIndA ) cudaFree(d_csrColIndA);
// CHECK: if (d_csrValA ) hipFree(d_csrValA);
if (d_csrValA ) cudaFree(d_csrValA);
// CHECK: if (d_x ) hipFree(d_x);
if (d_x ) cudaFree(d_x);
// CHeCK: if (d_y ) hipFree(d_y);
if (d_y ) cudaFree(d_y);
// CHECK: if (cublasH ) hipblasDestroy(cublasH);
if (cublasH ) cublasDestroy(cublasH);
// CHECK: if (cusparseH ) hipsparseDestroy(cusparseH);
if (cusparseH ) cusparseDestroy(cusparseH);
// CHECK: if (stream ) hipStreamDestroy(stream);
if (stream ) cudaStreamDestroy(stream);
// CHECK: if (descrA ) hipsparseDestroyMatDescr(descrA);
if (descrA ) cusparseDestroyMatDescr(descrA);
// CHECK: hipDeviceReset();
cudaDeviceReset();
return 0;
}