Merge pull request #775 from emankov/master
[HIPIFY][SPARSE] Helper and Level 1,2 functions
[ROCm/hip commit: dc5aea4b03]
Bu işleme şunda yer alıyor:
@@ -83,7 +83,156 @@
|
||||
| struct |`pruneInfo` | |
|
||||
| typedef |`pruneInfo_t` | |
|
||||
|
||||
## **2. CUSPARSE API functions**
|
||||
## **2.cuSPARSE Helper Function Reference**
|
||||
|
||||
| **CUDA** | **HIP** |
|
||||
|-----------------------------------------------------------|-------------------------------------------------|
|
||||
|`cusparseCreate` |`hipsparseCreate` |
|
||||
|`cusparseCreateSolveAnalysisInfo` | |
|
||||
|`cusparseCreateHybMat` |`hipsparseCreateHybMat` |
|
||||
|`cusparseCreateMatDescr` |`hipsparseCreateMatDescr` |
|
||||
|`cusparseDestroy` |`hipsparseDestroy` |
|
||||
|`cusparseDestroySolveAnalysisInfo` | |
|
||||
|`cusparseDestroyHybMat` |`hipsparseDestroyHybMat` |
|
||||
|`cusparseDestroyMatDescr` |`hipsparseDestroyMatDescr` |
|
||||
|`cusparseGetLevelInfo` | |
|
||||
|`cusparseGetMatDiagType` |`hipsparseGetMatDiagType` |
|
||||
|`cusparseGetMatFillMode` |`hipsparseGetMatFillMode` |
|
||||
|`cusparseGetMatIndexBase` |`hipsparseGetMatIndexBase` |
|
||||
|`cusparseGetMatType` |`hipsparseGetMatType` |
|
||||
|`cusparseGetPointerMode` |`hipsparseGetPointerMode` |
|
||||
|`cusparseGetVersion` |`hipsparseGetVersion` |
|
||||
|`cusparseSetMatDiagType` |`hipsparseSetMatDiagType` |
|
||||
|`cusparseSetMatFillMode` |`hipsparseSetMatFillMode` |
|
||||
|`cusparseSetMatType` |`hipsparseSetMatType` |
|
||||
|`cusparseSetPointerMode` |`hipsparseSetPointerMode` |
|
||||
|`cusparseSetStream` |`hipsparseSetStream` |
|
||||
|`cusparseGetStream` |`hipsparseGetStream` |
|
||||
|`cusparseCreateCsrsv2Info` |`hipsparseCreateCsrsv2Info` |
|
||||
|`cusparseDestroyCsrsv2Info` |`hipsparseDestroyCsrsv2Info` |
|
||||
|`cusparseCreateCsrsm2Info` | |
|
||||
|`cusparseDestroyCsrsm2Info` | |
|
||||
|`cusparseCreateCsric02Info` | |
|
||||
|`cusparseDestroyCsric02Info` | |
|
||||
|`cusparseCreateCsrilu02Info` |`hipsparseCreateCsrilu02Info` |
|
||||
|`cusparseDestroyCsrilu02Info` |`hipsparseDestroyCsrilu02Info` |
|
||||
|`cusparseCreateBsrsv2Info` | |
|
||||
|`cusparseDestroyBsrsv2Info` | |
|
||||
|`cusparseCreateBsrsm2Info` | |
|
||||
|`cusparseDestroyBsrsm2Info` | |
|
||||
|`cusparseCreateBsric02Info` | |
|
||||
|`cusparseDestroyBsric02Info` | |
|
||||
|`cusparseCreateBsrilu02Info` | |
|
||||
|`cusparseDestroyBsrilu02Info` | |
|
||||
|`cusparseCreateCsrgemm2Info` | |
|
||||
|`cusparseDestroyCsrgemm2Info` | |
|
||||
|`cusparseCreatePruneInfo` | |
|
||||
|`cusparseDestroyPruneInfo` | |
|
||||
|
||||
## **3.cuSPARSE Level 1 Function Reference**
|
||||
|
||||
| **CUDA** | **HIP** |
|
||||
|-----------------------------------------------------------|-------------------------------------------------|
|
||||
|`cusparseSaxpyi` |`hipsparseSaxpyi` |
|
||||
|`cusparseDaxpyi` |`hipsparseDaxpyi` |
|
||||
|`cusparseCaxpyi` | |
|
||||
|`cusparseZaxpyi` | |
|
||||
|`cusparseSdoti` |`hipsparseSdoti` |
|
||||
|`cusparseDdoti` |`hipsparseDdoti` |
|
||||
|`cusparseCdoti` | |
|
||||
|`cusparseZdoti` | |
|
||||
|`cusparseCdotci` | |
|
||||
|`cusparseZdotci` | |
|
||||
|`cusparseSgthr` |`hipsparseSgthr` |
|
||||
|`cusparseDgthr` |`hipsparseDgthr` |
|
||||
|`cusparseCgthr` | |
|
||||
|`cusparseZgthr` | |
|
||||
|`cusparseSgthrz` |`hipsparseSgthrz` |
|
||||
|`cusparseDgthrz` |`hipsparseDgthrz` |
|
||||
|`cusparseCgthrz` | |
|
||||
|`cusparseZgthrz` | |
|
||||
|`cusparseSroti` |`hipsparseSroti` |
|
||||
|`cusparseDroti` |`hipsparseDroti` |
|
||||
|`cusparseSsctr` |`hipsparseSsctr` |
|
||||
|`cusparseDsctr` |`hipsparseDsctr` |
|
||||
|`cusparseCsctr` | |
|
||||
|`cusparseZsctr` | |
|
||||
|
||||
## **4.cuSPARSE Level 2 Function Reference**
|
||||
|
||||
| **CUDA** | **HIP** |
|
||||
|-----------------------------------------------------------|-------------------------------------------------|
|
||||
|`cusparseSbsrmv` | |
|
||||
|`cusparseDbsrmv` | |
|
||||
|`cusparseCbsrmv` | |
|
||||
|`cusparseZbsrmv` | |
|
||||
|`cusparseSbsrxmv` | |
|
||||
|`cusparseDbsrxmv` | |
|
||||
|`cusparseCbsrxmv` | |
|
||||
|`cusparseZbsrxmv` | |
|
||||
|`cusparseScsrmv` |`hipsparseScsrmv` |
|
||||
|`cusparseDcsrmv` |`hipsparseDcsrmv` |
|
||||
|`cusparseCcsrmv` | |
|
||||
|`cusparseZcsrmv` | |
|
||||
|`cusparseCsrmvEx` | |
|
||||
|`cusparseCsrmvEx_bufferSize` | |
|
||||
|`cusparseScsrmv_mp` | |
|
||||
|`cusparseDcsrmv_mp` | |
|
||||
|`cusparseCcsrmv_mp` | |
|
||||
|`cusparseZcsrmv_mp` | |
|
||||
|`cusparseSgemvi` | |
|
||||
|`cusparseDgemvi` | |
|
||||
|`cusparseCgemvi` | |
|
||||
|`cusparseZgemvi` | |
|
||||
|`cusparseSgemvi_bufferSize` | |
|
||||
|`cusparseDgemvi_bufferSize` | |
|
||||
|`cusparseCgemvi_bufferSize` | |
|
||||
|`cusparseZgemvi_bufferSize` | |
|
||||
|`cusparseSbsrsv2_bufferSize` | |
|
||||
|`cusparseDbsrsv2_bufferSize` | |
|
||||
|`cusparseCbsrsv2_bufferSize` | |
|
||||
|`cusparseZbsrsv2_bufferSize` | |
|
||||
|`cusparseSbsrsv2_analysis` | |
|
||||
|`cusparseDbsrsv2_analysis` | |
|
||||
|`cusparseCbsrsv2_analysis` | |
|
||||
|`cusparseZbsrsv2_analysis` | |
|
||||
|`cusparseScsrsv_solve` | |
|
||||
|`cusparseDcsrsv_solve` | |
|
||||
|`cusparseCcsrsv_solve` | |
|
||||
|`cusparseZcsrsv_solve` | |
|
||||
|`cusparseXbsrsv2_zeroPivot` | |
|
||||
|`cusparseScsrsv_analysis` | |
|
||||
|`cusparseDcsrsv_analysis` | |
|
||||
|`cusparseCcsrsv_analysis` | |
|
||||
|`cusparseZcsrsv_analysis` | |
|
||||
|`cusparseCsrsv_analysisEx` | |
|
||||
|`cusparseScsrsv_solve` | |
|
||||
|`cusparseDcsrsv_solve` | |
|
||||
|`cusparseCcsrsv_solve` | |
|
||||
|`cusparseZcsrsv_solve` | |
|
||||
|`cusparseCsrsv_solveEx` | |
|
||||
|`cusparseScsrsv2_bufferSize` |`hipsparseScsrsv2_bufferSize` |
|
||||
|`cusparseDcsrsv2_bufferSize` |`hipsparseDcsrsv2_bufferSize` |
|
||||
|`cusparseCcsrsv2_bufferSize` | |
|
||||
|`cusparseZcsrsv2_bufferSize` | |
|
||||
|`cusparseScsrsv2_analysis` |`hipsparseScsrsv2_analysis` |
|
||||
|`cusparseDcsrsv2_analysis` |`hipsparseDcsrsv2_analysis` |
|
||||
|`cusparseCcsrsv2_analysis` | |
|
||||
|`cusparseZcsrsv2_analysis` | |
|
||||
|`cusparseScsrsv2_solve` |`hipsparseScsrsv2_solve` |
|
||||
|`cusparseDcsrsv2_solve` |`hipsparseDcsrsv2_solve` |
|
||||
|`cusparseCcsrsv2_solve` | |
|
||||
|`cusparseZcsrsv2_solve` | |
|
||||
|`cusparseXcsrsv2_zeroPivot` |`hipsparseXcsrsv2_zeroPivot` |
|
||||
|`cusparseShybmv` |`hipsparseShybmv` |
|
||||
|`cusparseDhybmv` |`hipsparseDhybmv` |
|
||||
|`cusparseChybmv` | |
|
||||
|`cusparseZhybmv` | |
|
||||
|`cusparseShybsv_analysis` | |
|
||||
|`cusparseDhybsv_analysis` | |
|
||||
|`cusparseChybsv_analysis` | |
|
||||
|`cusparseZhybsv_analysis` | |
|
||||
|`cusparseShybsv_solve` | |
|
||||
|`cusparseDhybsv_solve` | |
|
||||
|`cusparseChybsv_solve` | |
|
||||
|`cusparseZhybsv_solve` | |
|
||||
|
||||
@@ -2,4 +2,176 @@
|
||||
|
||||
// Maps the names of CUDA SPARSE API types to the corresponding HIP types
|
||||
const std::map<llvm::StringRef, hipCounter> CUDA_SPARSE_FUNCTION_MAP{
|
||||
// 5. cuSPARSE Helper Function Reference
|
||||
{"cusparseCreate", {"hipsparseCreate", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseCreateSolveAnalysisInfo", {"hipsparseCreateSolveAnalysisInfo", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCreateHybMat", {"hipsparseCreateHybMat", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseCreateMatDescr", {"hipsparseCreateMatDescr", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseDestroy", {"hipsparseDestroy", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseDestroySolveAnalysisInfo", {"hipsparseDestroySolveAnalysisInfo", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDestroyHybMat", {"hipsparseDestroyHybMat", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseDestroyMatDescr", {"hipsparseDestroyMatDescr", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseGetLevelInfo", {"hipsparseGetLevelInfo", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseGetMatDiagType", {"hipsparseGetMatDiagType", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseGetMatFillMode", {"hipsparseGetMatFillMode", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseGetMatIndexBase", {"hipsparseGetMatIndexBase", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseGetMatType", {"hipsparseGetMatType", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseGetPointerMode", {"hipsparseGetPointerMode", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseGetVersion", {"hipsparseGetVersion", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseSetMatDiagType", {"hipsparseSetMatDiagType", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseSetMatFillMode", {"hipsparseSetMatFillMode", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseSetMatIndexBase", {"hipsparseSetMatIndexBase", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseSetMatType", {"hipsparseSetMatType", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseSetPointerMode", {"hipsparseSetPointerMode", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseSetStream", {"hipsparseSetStream", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseGetStream", {"hipsparseGetStream", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseCreateCsrsv2Info", {"hipsparseCreateCsrsv2Info", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseDestroyCsrsv2Info", {"hipsparseDestroyCsrsv2Info", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseCreateCsrsm2Info", {"hipsparseCreateCsrsm2Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDestroyCsrsm2Info", {"hipsparseDestroyCsrsm2Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCreateCsric02Info", {"hipsparseCreateCsric02Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDestroyCsric02Info", {"hipsparseDestroyCsric02Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCreateCsrilu02Info", {"hipsparseCreateCsrilu02Info", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseDestroyCsrilu02Info", {"hipsparseDestroyCsrilu02Info", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseCreateBsrsv2Info", {"hipsparseCreateBsrsv2Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDestroyBsrsv2Info", {"hipsparseDestroyBsrsv2Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCreateBsrsm2Info", {"hipsparseCreateBsrsm2Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDestroyBsrsm2Info", {"hipsparseDestroyBsrsm2Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCreateBsric02Inf", {"hipsparseCreateBsric02Inf", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDestroyBsric02Info", {"hipsparseDestroyBsric02Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCreateBsrilu02Info", {"hipsparseCreateBsrilu02Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDestroyBsrilu02Info", {"hipsparseDestroyBsrilu02Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCreateCsrgemm2Info", {"hipsparseCreateCsrgemm2Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDestroyCsrgemm2Info", {"hipsparseDestroyCsrgemm2Info", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCreatePruneInfo", {"hipsparseCreatePruneInfo", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDestroyPruneInfo", {"hipsparseDestroyPruneInfo", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
// 6. cuSPARSE Level 1 Function Reference
|
||||
{"cusparseSaxpyi", {"hipsparseSaxpyi", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseDaxpyi", {"hipsparseDaxpyi", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseCaxpyi", {"hipsparseCaxpyi", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZaxpyi", {"hipsparseZaxpyi", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseSdoti", {"hipsparseSdoti", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseDdoti", {"hipsparseDdoti", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseCdoti", {"hipsparseCdoti", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZdoti", {"hipsparseZdoti", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseCdotci", {"hipsparseCdotci", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZdotci", {"hipsparseZdotci", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseSgthr", {"hipsparseSgthr", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseDgthr", {"hipsparseDgthr", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseCgthr", {"hipsparseCgthr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZgthr", {"hipsparseZgthr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseSgthrz", {"hipsparseSgthrz", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseDgthrz", {"hipsparseDgthrz", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseCgthrz", {"hipsparseCgthrz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZgthrz", {"hipsparseZgthrz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseSroti", {"hipsparseSroti", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseDroti", {"hipsparseDroti", CONV_LIB_FUNC, API_SPARSE}},
|
||||
|
||||
{"cusparseSsctr", {"hipsparseSsctr", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseDsctr", {"hipsparseDsctr", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseCsctr", {"hipsparseCsctr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZsctr", {"hipsparseZsctr", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
// 7. cuSPARSE Level 2 Function Reference
|
||||
{"cusparseSbsrmv", {"hipsparseSbsrmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDbsrmv", {"hipsparseDbsrmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCbsrmv", {"hipsparseCbsrmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZbsrmv", {"hipsparseZbsrmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseSbsrxmv", {"hipsparseSbsrxmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDbsrxmv", {"hipsparseDbsrxmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCbsrxmv", {"hipsparseCbsrxmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZbsrxmv", {"hipsparseZbsrxmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseScsrmv", {"hipsparseScsrmv", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseDcsrmv", {"hipsparseDcsrmv", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseCcsrmv", {"hipsparseCcsrmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsrmv", {"hipsparseZcsrmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseCsrmvEx", {"hipsparseCsrmvEx", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCsrmvEx_bufferSize", {"hipsparseCsrmvEx_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseScsrmv_mp", {"hipsparseScsrmv_mp", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDcsrmv_mp", {"hipsparseDcsrmv_mp", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCcsrmv_mp", {"hipsparseCcsrmv_mp", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsrmv_mp", {"hipsparseZcsrmv_mp", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseSgemvi", {"hipsparseSgemvi", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDgemvi", {"hipsparseDgemvi", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCgemvi", {"hipsparseCgemvi", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZgemvi", {"hipsparseZgemvi", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseSgemvi_bufferSize", {"hipsparseSgemvi_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDgemvi_bufferSize", {"hipsparseDgemvi_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCgemvi_bufferSize", {"hipsparseCgemvi_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZgemvi_bufferSize", {"hipsparseZgemvi_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseSbsrsv2_bufferSize", {"hipsparseSbsrsv2_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDbsrsv2_bufferSize", {"hipsparseDbsrsv2_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCbsrsv2_bufferSize", {"hipsparseCbsrsv2_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZbsrsv2_bufferSize", {"hipsparseZbsrsv2_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseSbsrsv2_analysis", {"hipsparseSbsrsv2_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDbsrsv2_analysis", {"hipsparseDbsrsv2_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCbsrsv2_analysis", {"hipsparseCbsrsv2_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZbsrsv2_analysis", {"hipsparseZbsrsv2_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseScsrsv_solve", {"hipsparseScsrsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDcsrsv_solve", {"hipsparseDcsrsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCcsrsv_solve", {"hipsparseCcsrsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsrsv_solve", {"hipsparseZcsrsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseXbsrsv2_zeroPivot", {"hipsparseXbsrsv2_zeroPivot", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseScsrsv_analysis", {"hipsparseScsrsv_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDcsrsv_analysis", {"hipsparseDcsrsv_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCcsrsv_analysis", {"hipsparseCcsrsv_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsrsv_analysis", {"hipsparseZcsrsv_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseCsrsv_analysisEx", {"hipsparseCsrsv_analysisEx", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseScsrsv_solve", {"hipsparseScsrsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDcsrsv_solve", {"hipsparseDcsrsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseCcsrsv_solve", {"hipsparseCcsrsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsrsv_solve", {"hipsparseZcsrsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseCsrsv_solveEx", {"hipsparseCsrsv_solveEx", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseScsrsv2_bufferSize", {"hipsparseScsrsv2_bufferSize", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseDcsrsv2_bufferSize", {"hipsparseDcsrsv2_bufferSize", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseCcsrsv2_bufferSize", {"hipsparseCcsrsv2_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsrsv2_bufferSize", {"hipsparseZcsrsv2_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseScsrsv2_analysis", {"hipsparseScsrsv2_analysis", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseDcsrsv2_analysis", {"hipsparseDcsrsv2_analysis", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseCcsrsv2_analysis", {"hipsparseCcsrsv2_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsrsv2_analysis", {"hipsparseZcsrsv2_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseScsrsv2_solve", {"hipsparseScsrsv2_solve", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseDcsrsv2_solve", {"hipsparseDcsrsv2_solve", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseCcsrsv2_solve", {"hipsparseCcsrsv2_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZcsrsv2_solve", {"hipsparseZcsrsv2_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseXcsrsv2_zeroPivot", {"hipsparseXcsrsv2_zeroPivot", CONV_LIB_FUNC, API_SPARSE}},
|
||||
|
||||
{"cusparseShybmv", {"hipsparseShybmv", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseDhybmv", {"hipsparseDhybmv", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseChybmv", {"hipsparseChybmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZhybmv", {"hipsparseZhybmv", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseShybsv_analysis", {"hipsparseShybsv_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDhybsv_analysis", {"hipsparseDhybsv_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseChybsv_analysis", {"hipsparseChybsv_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZhybsv_analysis", {"hipsparseZhybsv_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseShybsv_solve", {"hipsparseShybsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseDhybsv_solve", {"hipsparseDhybsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseChybsv_solve", {"hipsparseChybsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseZhybsv_solve", {"hipsparseZhybsv_solve", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
};
|
||||
|
||||
@@ -97,7 +97,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_SPARSE_TYPE_NAME_MAP{
|
||||
{"CUSPARSE_SOLVE_POLICY_NO_LEVEL", {"HIPSPARSE_SOLVE_POLICY_NO_LEVEL", CONV_NUMERIC_LITERAL, API_SPARSE}},
|
||||
{"CUSPARSE_SOLVE_POLICY_USE_LEVEL", {"HIPSPARSE_SOLVE_POLICY_USE_LEVEL", CONV_NUMERIC_LITERAL, API_SPARSE}},
|
||||
|
||||
{"cusparseStatus_t", {"hipsparseMatrixType_t", CONV_TYPE, API_SPARSE}},
|
||||
{"cusparseStatus_t", {"hipsparseStatus_t", CONV_TYPE, API_SPARSE}},
|
||||
{"CUSPARSE_STATUS_SUCCESS", {"HIPSPARSE_STATUS_SUCCESS", CONV_NUMERIC_LITERAL, API_SPARSE}},
|
||||
{"CUSPARSE_STATUS_NOT_INITIALIZED", {"HIPSPARSE_STATUS_NOT_INITIALIZED", CONV_NUMERIC_LITERAL, API_SPARSE}},
|
||||
{"CUSPARSE_STATUS_ALLOC_FAILED", {"HIPSPARSE_STATUS_ALLOC_FAILED", CONV_NUMERIC_LITERAL, API_SPARSE}},
|
||||
|
||||
@@ -0,0 +1,367 @@
|
||||
// RUN: %run_test hipify "%s" "%t" %cuda_args
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
// CHECK: #include <hip/hip_runtime.h>
|
||||
#include <cuda_runtime.h>
|
||||
// CHECK: #include "hipsparse.h"
|
||||
#include "cusparse.h"
|
||||
|
||||
// CHECK: if (y) hipFree(y);
|
||||
// CHECK: if (z) hipFree(z);
|
||||
// CHECK: if (xInd) hipFree(xInd);
|
||||
// CHECK: if (xVal) hipFree(xVal);
|
||||
// CHECK: if (csrRowPtr) hipFree(csrRowPtr);
|
||||
// CHECK: if (cooRowIndex) hipFree(cooRowIndex);
|
||||
// CHECK: if (cooColIndex) hipFree(cooColIndex);
|
||||
// CHECK: if (cooVal) hipFree(cooVal);
|
||||
// CHECK: if (descr) hipsparseDestroyMatDescr(descr);
|
||||
// CHECK: if (handle) hipsparseDestroy(handle);
|
||||
// CHECK: hipDeviceReset();
|
||||
#define CLEANUP(s) \
|
||||
do { \
|
||||
printf ("%s\n", s); \
|
||||
if (yHostPtr) free(yHostPtr); \
|
||||
if (zHostPtr) free(zHostPtr); \
|
||||
if (xIndHostPtr) free(xIndHostPtr); \
|
||||
if (xValHostPtr) free(xValHostPtr); \
|
||||
if (cooRowIndexHostPtr) free(cooRowIndexHostPtr);\
|
||||
if (cooColIndexHostPtr) free(cooColIndexHostPtr);\
|
||||
if (cooValHostPtr) free(cooValHostPtr); \
|
||||
if (y) cudaFree(y); \
|
||||
if (z) cudaFree(z); \
|
||||
if (xInd) cudaFree(xInd); \
|
||||
if (xVal) cudaFree(xVal); \
|
||||
if (csrRowPtr) cudaFree(csrRowPtr); \
|
||||
if (cooRowIndex) cudaFree(cooRowIndex); \
|
||||
if (cooColIndex) cudaFree(cooColIndex); \
|
||||
if (cooVal) cudaFree(cooVal); \
|
||||
if (descr) cusparseDestroyMatDescr(descr);\
|
||||
if (handle) cusparseDestroy(handle); \
|
||||
cudaDeviceReset(); \
|
||||
fflush (stdout); \
|
||||
} while (0)
|
||||
|
||||
int main(){
|
||||
// CHECK: hipError_t cudaStat1,cudaStat2,cudaStat3,cudaStat4,cudaStat5,cudaStat6;
|
||||
cudaError_t cudaStat1,cudaStat2,cudaStat3,cudaStat4,cudaStat5,cudaStat6;
|
||||
// CHECK: hipsparseStatus_t status;
|
||||
cusparseStatus_t status;
|
||||
// CHECK: hipsparseHandle_t handle=0;
|
||||
cusparseHandle_t handle=0;
|
||||
// CHECK: hipsparseMatDescr_t descr=0;
|
||||
cusparseMatDescr_t descr=0;
|
||||
int * cooRowIndexHostPtr=0;
|
||||
int * cooColIndexHostPtr=0;
|
||||
double * cooValHostPtr=0;
|
||||
int * cooRowIndex=0;
|
||||
int * cooColIndex=0;
|
||||
double * cooVal=0;
|
||||
int * xIndHostPtr=0;
|
||||
double * xValHostPtr=0;
|
||||
double * yHostPtr=0;
|
||||
int * xInd=0;
|
||||
double * xVal=0;
|
||||
double * y=0;
|
||||
int * csrRowPtr=0;
|
||||
double * zHostPtr=0;
|
||||
double * z=0;
|
||||
int n, nnz, nnz_vector;
|
||||
double dzero =0.0;
|
||||
double dtwo =2.0;
|
||||
double dthree=3.0;
|
||||
double dfive =5.0;
|
||||
printf("testing example\n");
|
||||
/* create the following sparse test matrix in COO format */
|
||||
/* |1.0 2.0 3.0|
|
||||
| 4.0 |
|
||||
|5.0 6.0 7.0|
|
||||
| 8.0 9.0| */
|
||||
n=4; nnz=9;
|
||||
cooRowIndexHostPtr = (int *) malloc(nnz*sizeof(cooRowIndexHostPtr[0]));
|
||||
cooColIndexHostPtr = (int *) malloc(nnz*sizeof(cooColIndexHostPtr[0]));
|
||||
cooValHostPtr = (double *)malloc(nnz*sizeof(cooValHostPtr[0]));
|
||||
if ((!cooRowIndexHostPtr) || (!cooColIndexHostPtr) || (!cooValHostPtr)){
|
||||
CLEANUP("Host malloc failed (matrix)");
|
||||
return 1;
|
||||
}
|
||||
cooRowIndexHostPtr[0]=0; cooColIndexHostPtr[0]=0; cooValHostPtr[0]=1.0;
|
||||
cooRowIndexHostPtr[1]=0; cooColIndexHostPtr[1]=2; cooValHostPtr[1]=2.0;
|
||||
cooRowIndexHostPtr[2]=0; cooColIndexHostPtr[2]=3; cooValHostPtr[2]=3.0;
|
||||
cooRowIndexHostPtr[3]=1; cooColIndexHostPtr[3]=1; cooValHostPtr[3]=4.0;
|
||||
cooRowIndexHostPtr[4]=2; cooColIndexHostPtr[4]=0; cooValHostPtr[4]=5.0;
|
||||
cooRowIndexHostPtr[5]=2; cooColIndexHostPtr[5]=2; cooValHostPtr[5]=6.0;
|
||||
cooRowIndexHostPtr[6]=2; cooColIndexHostPtr[6]=3; cooValHostPtr[6]=7.0;
|
||||
cooRowIndexHostPtr[7]=3; cooColIndexHostPtr[7]=1; cooValHostPtr[7]=8.0;
|
||||
cooRowIndexHostPtr[8]=3; cooColIndexHostPtr[8]=3; cooValHostPtr[8]=9.0;
|
||||
nnz_vector = 3;
|
||||
xIndHostPtr = (int *) malloc(nnz_vector*sizeof(xIndHostPtr[0]));
|
||||
xValHostPtr = (double *)malloc(nnz_vector*sizeof(xValHostPtr[0]));
|
||||
yHostPtr = (double *)malloc(2*n *sizeof(yHostPtr[0]));
|
||||
zHostPtr = (double *)malloc(2*(n+1) *sizeof(zHostPtr[0]));
|
||||
if((!xIndHostPtr) || (!xValHostPtr) || (!yHostPtr) || (!zHostPtr)) {
|
||||
CLEANUP("Host malloc failed (vectors)");
|
||||
return 1;
|
||||
}
|
||||
yHostPtr[0] = 10.0;
|
||||
xIndHostPtr[0]=0;
|
||||
xValHostPtr[0]=100.0;
|
||||
yHostPtr[1] = 20.0;
|
||||
xIndHostPtr[1]=1;
|
||||
xValHostPtr[1]=200.0;
|
||||
yHostPtr[2] = 30.0;
|
||||
yHostPtr[3] = 40.0;
|
||||
xIndHostPtr[2]=3;
|
||||
xValHostPtr[2]=400.0;
|
||||
yHostPtr[4] = 50.0;
|
||||
yHostPtr[5] = 60.0;
|
||||
yHostPtr[6] = 70.0;
|
||||
yHostPtr[7] = 80.0;
|
||||
/* allocate GPU memory and copy the matrix and vectors into it */
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&cooRowIndex,nnz*sizeof(cooRowIndex[0]));
|
||||
cudaStat1 = cudaMalloc((void**)&cooRowIndex,nnz*sizeof(cooRowIndex[0]));
|
||||
// CHECK: cudaStat2 = hipMalloc((void**)&cooColIndex,nnz*sizeof(cooColIndex[0]));
|
||||
cudaStat2 = cudaMalloc((void**)&cooColIndex,nnz*sizeof(cooColIndex[0]));
|
||||
// CHECK: cudaStat3 = hipMalloc((void**)&cooVal, nnz*sizeof(cooVal[0]));
|
||||
cudaStat3 = cudaMalloc((void**)&cooVal, nnz*sizeof(cooVal[0]));
|
||||
// CHECK: cudaStat4 = hipMalloc((void**)&y, 2*n*sizeof(y[0]));
|
||||
cudaStat4 = cudaMalloc((void**)&y, 2*n*sizeof(y[0]));
|
||||
// CHECK: cudaStat5 = hipMalloc((void**)&xInd,nnz_vector*sizeof(xInd[0]));
|
||||
cudaStat5 = cudaMalloc((void**)&xInd,nnz_vector*sizeof(xInd[0]));
|
||||
// CHECK: cudaStat6 = hipMalloc((void**)&xVal,nnz_vector*sizeof(xVal[0]));
|
||||
cudaStat6 = cudaMalloc((void**)&xVal,nnz_vector*sizeof(xVal[0]));
|
||||
// CHECK: if ((cudaStat1 != hipSuccess) ||
|
||||
// CHECK: (cudaStat2 != hipSuccess) ||
|
||||
// CHECK: (cudaStat3 != hipSuccess) ||
|
||||
// CHECK: (cudaStat4 != hipSuccess) ||
|
||||
// CHECK: (cudaStat5 != hipSuccess) ||
|
||||
// CHECK: (cudaStat6 != hipSuccess)) {
|
||||
if ((cudaStat1 != cudaSuccess) ||
|
||||
(cudaStat2 != cudaSuccess) ||
|
||||
(cudaStat3 != cudaSuccess) ||
|
||||
(cudaStat4 != cudaSuccess) ||
|
||||
(cudaStat5 != cudaSuccess) ||
|
||||
(cudaStat6 != cudaSuccess)) {
|
||||
CLEANUP("Device malloc failed");
|
||||
return 1;
|
||||
}
|
||||
// CHECK: cudaStat1 = hipMemcpy(cooRowIndex, cooRowIndexHostPtr,
|
||||
// CHECK: hipMemcpyHostToDevice);
|
||||
cudaStat1 = cudaMemcpy(cooRowIndex, cooRowIndexHostPtr,
|
||||
(size_t)(nnz*sizeof(cooRowIndex[0])),
|
||||
cudaMemcpyHostToDevice);
|
||||
// CHECK: cudaStat2 = hipMemcpy(cooColIndex, cooColIndexHostPtr,
|
||||
// CHECK: hipMemcpyHostToDevice);
|
||||
cudaStat2 = cudaMemcpy(cooColIndex, cooColIndexHostPtr,
|
||||
(size_t)(nnz*sizeof(cooColIndex[0])),
|
||||
cudaMemcpyHostToDevice);
|
||||
// CHECK: cudaStat3 = hipMemcpy(cooVal, cooValHostPtr,
|
||||
// CHECK: hipMemcpyHostToDevice);
|
||||
cudaStat3 = cudaMemcpy(cooVal, cooValHostPtr,
|
||||
(size_t)(nnz*sizeof(cooVal[0])),
|
||||
cudaMemcpyHostToDevice);
|
||||
// CHECK: cudaStat4 = hipMemcpy(y, yHostPtr,
|
||||
// CHECK: hipMemcpyHostToDevice);
|
||||
cudaStat4 = cudaMemcpy(y, yHostPtr,
|
||||
(size_t)(2*n*sizeof(y[0])),
|
||||
cudaMemcpyHostToDevice);
|
||||
// CHECK: cudaStat5 = hipMemcpy(xInd, xIndHostPtr,
|
||||
// CHECK: hipMemcpyHostToDevice);
|
||||
cudaStat5 = cudaMemcpy(xInd, xIndHostPtr,
|
||||
(size_t)(nnz_vector*sizeof(xInd[0])),
|
||||
cudaMemcpyHostToDevice);
|
||||
// CHECK: cudaStat6 = hipMemcpy(xVal, xValHostPtr,
|
||||
// CHECK: hipMemcpyHostToDevice);
|
||||
cudaStat6 = cudaMemcpy(xVal, xValHostPtr,
|
||||
(size_t)(nnz_vector*sizeof(xVal[0])),
|
||||
cudaMemcpyHostToDevice);
|
||||
// CHECK: if ((cudaStat1 != hipSuccess) ||
|
||||
// CHECK: (cudaStat2 != hipSuccess) ||
|
||||
// CHECK: (cudaStat3 != hipSuccess) ||
|
||||
// CHECK: (cudaStat4 != hipSuccess) ||
|
||||
// CHECK: (cudaStat5 != hipSuccess) ||
|
||||
// CHECK: (cudaStat6 != hipSuccess)) {
|
||||
if ((cudaStat1 != cudaSuccess) ||
|
||||
(cudaStat2 != cudaSuccess) ||
|
||||
(cudaStat3 != cudaSuccess) ||
|
||||
(cudaStat4 != cudaSuccess) ||
|
||||
(cudaStat5 != cudaSuccess) ||
|
||||
(cudaStat6 != cudaSuccess)) {
|
||||
CLEANUP("Memcpy from Host to Device failed");
|
||||
return 1;
|
||||
}
|
||||
/* initialize cusparse library */
|
||||
// CHECK: status= hipsparseCreate(&handle);
|
||||
status= cusparseCreate(&handle);
|
||||
// CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) {
|
||||
if (status != CUSPARSE_STATUS_SUCCESS) {
|
||||
CLEANUP("CUSPARSE Library initialization failed");
|
||||
return 1;
|
||||
}
|
||||
/* create and setup matrix descriptor */
|
||||
// CHECK: status= hipsparseCreateMatDescr(&descr);
|
||||
status= cusparseCreateMatDescr(&descr);
|
||||
// CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) {
|
||||
if (status != CUSPARSE_STATUS_SUCCESS) {
|
||||
CLEANUP("Matrix descriptor initialization failed");
|
||||
return 1;
|
||||
}
|
||||
// CHECK: hipsparseSetMatType(descr,HIPSPARSE_MATRIX_TYPE_GENERAL);
|
||||
cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||
// CHECK: hipsparseSetMatIndexBase(descr,HIPSPARSE_INDEX_BASE_ZERO);
|
||||
cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO);
|
||||
/* exercise conversion routines (convert matrix from COO 2 CSR format) */
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&csrRowPtr,(n+1)*sizeof(csrRowPtr[0]));
|
||||
cudaStat1 = cudaMalloc((void**)&csrRowPtr,(n+1)*sizeof(csrRowPtr[0]));
|
||||
// CHECK: if (cudaStat1 != hipSuccess) {
|
||||
if (cudaStat1 != cudaSuccess) {
|
||||
CLEANUP("Device malloc failed (csrRowPtr)");
|
||||
return 1;
|
||||
}
|
||||
status= cusparseXcoo2csr(handle,cooRowIndex,nnz,n,
|
||||
// CHECK: csrRowPtr,HIPSPARSE_INDEX_BASE_ZERO);
|
||||
csrRowPtr,CUSPARSE_INDEX_BASE_ZERO);
|
||||
// CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) {
|
||||
if (status != CUSPARSE_STATUS_SUCCESS) {
|
||||
CLEANUP("Conversion from COO to CSR format failed");
|
||||
return 1;
|
||||
}
|
||||
//csrRowPtr = [0 3 4 7 9]
|
||||
// The following test only works for compute capability 1.3 and above
|
||||
// because it needs double precision.
|
||||
int devId;
|
||||
// CHECK: hipDeviceProp_t prop;
|
||||
cudaDeviceProp prop;
|
||||
// CHECK: hipError_t cudaStat;
|
||||
cudaError_t cudaStat;
|
||||
// CHECK: cudaStat = hipGetDevice(&devId);
|
||||
cudaStat = cudaGetDevice(&devId);
|
||||
// CHECK: if (hipSuccess != cudaStat){
|
||||
if (cudaSuccess != cudaStat){
|
||||
// CLEANUP("hipGetDevice failed");
|
||||
CLEANUP("cudaGetDevice failed");
|
||||
// printf("Error: cudaStat %d, %s\n", cudaStat, hipGetErrorString(cudaStat));
|
||||
printf("Error: cudaStat %d, %s\n", cudaStat, cudaGetErrorString(cudaStat));
|
||||
return 1;
|
||||
}
|
||||
// CHECK: cudaStat = hipGetDeviceProperties( &prop, devId);
|
||||
cudaStat = cudaGetDeviceProperties( &prop, devId);
|
||||
// CHECK: if (hipSuccess != cudaStat) {
|
||||
if (cudaSuccess != cudaStat) {
|
||||
// CHECK: CLEANUP("hipGetDeviceProperties failed");
|
||||
CLEANUP("cudaGetDeviceProperties failed");
|
||||
// CHECK: printf("Error: cudaStat %d, %s\n", cudaStat, hipGetErrorString(cudaStat));
|
||||
printf("Error: cudaStat %d, %s\n", cudaStat, cudaGetErrorString(cudaStat));
|
||||
return 1;
|
||||
}
|
||||
int cc = 100*prop.major + 10*prop.minor;
|
||||
if (cc < 130){
|
||||
CLEANUP("waive the test because only sm13 and above are supported\n");
|
||||
printf("the device has compute capability %d\n", cc);
|
||||
printf("example test WAIVED");
|
||||
return 2;
|
||||
}
|
||||
/* exercise Level 1 routines (scatter vector elements) */
|
||||
// TODO: status= hipsparseDsctr(handle, nnz_vector, xVal, xInd,
|
||||
// CHECK: &y[n], HIPSPARSE_INDEX_BASE_ZERO);
|
||||
status= cusparseDsctr(handle, nnz_vector, xVal, xInd,
|
||||
&y[n], CUSPARSE_INDEX_BASE_ZERO);
|
||||
// CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) {
|
||||
if (status != CUSPARSE_STATUS_SUCCESS) {
|
||||
CLEANUP("Scatter from sparse to dense vector failed");
|
||||
return 1;
|
||||
}
|
||||
//y = [10 20 30 40 | 100 200 70 400]
|
||||
/* exercise Level 2 routines (csrmv) */
|
||||
// CHECK: status= hipsparseDcsrmv(handle,HIPSPARSE_OPERATION_NON_TRANSPOSE, n, n, nnz,
|
||||
status= cusparseDcsrmv(handle,CUSPARSE_OPERATION_NON_TRANSPOSE, n, n, nnz,
|
||||
&dtwo, descr, cooVal, csrRowPtr, cooColIndex,
|
||||
&y[0], &dthree, &y[n]);
|
||||
// CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) {
|
||||
if (status != CUSPARSE_STATUS_SUCCESS) {
|
||||
CLEANUP("Matrix-vector multiplication failed");
|
||||
return 1;
|
||||
}
|
||||
//y = [10 20 30 40 | 680 760 1230 2240]
|
||||
// CHECK: hipMemcpy(yHostPtr, y, (size_t)(2*n*sizeof(y[0])), hipMemcpyDeviceToHost);
|
||||
cudaMemcpy(yHostPtr, y, (size_t)(2*n*sizeof(y[0])), cudaMemcpyDeviceToHost);
|
||||
/* exercise Level 3 routines (csrmm) */
|
||||
// cudaStat1 = hipMalloc((void**)&z, 2*(n+1)*sizeof(z[0]));
|
||||
cudaStat1 = cudaMalloc((void**)&z, 2*(n+1)*sizeof(z[0]));
|
||||
// CHECK: if (cudaStat1 != hipSuccess) {
|
||||
if (cudaStat1 != cudaSuccess) {
|
||||
CLEANUP("Device malloc failed (z)");
|
||||
return 1;
|
||||
}
|
||||
// CHECK: cudaStat1 = hipMemset((void *)z,0, 2*(n+1)*sizeof(z[0]));
|
||||
cudaStat1 = cudaMemset((void *)z,0, 2*(n+1)*sizeof(z[0]));
|
||||
// CHECK: if (cudaStat1 != hipSuccess) {
|
||||
if (cudaStat1 != cudaSuccess) {
|
||||
CLEANUP("Memset on Device failed");
|
||||
return 1;
|
||||
}
|
||||
// TODO: status= hipsparseDcsrmm(handle, HIPSPARSE_OPERATION_NON_TRANSPOSE, n, 2, n,
|
||||
status= cusparseDcsrmm(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n, 2, n,
|
||||
nnz, &dfive, descr, cooVal, csrRowPtr, cooColIndex,
|
||||
y, n, &dzero, z, n+1);
|
||||
// CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) {
|
||||
if (status != CUSPARSE_STATUS_SUCCESS) {
|
||||
CLEANUP("Matrix-matrix multiplication failed");
|
||||
return 1;
|
||||
}
|
||||
/* print final results (z) */
|
||||
// CHECK: cudaStat1 = hipMemcpy(zHostPtr, z,
|
||||
// CHECK: hipMemcpyDeviceToHost);
|
||||
cudaStat1 = cudaMemcpy(zHostPtr, z,
|
||||
(size_t)(2*(n+1)*sizeof(z[0])),
|
||||
cudaMemcpyDeviceToHost);
|
||||
// CHECK: if (cudaStat1 != hipSuccess) {
|
||||
if (cudaStat1 != cudaSuccess) {
|
||||
CLEANUP("Memcpy from Device to Host failed");
|
||||
return 1;
|
||||
}
|
||||
//z = [950 400 2550 2600 0 | 49300 15200 132300 131200 0]
|
||||
/* destroy matrix descriptor */
|
||||
// status = hipsparseDestroyMatDescr(descr);
|
||||
status = cusparseDestroyMatDescr(descr);
|
||||
descr = 0;
|
||||
// CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) {
|
||||
if (status != CUSPARSE_STATUS_SUCCESS) {
|
||||
CLEANUP("Matrix descriptor destruction failed");
|
||||
return 1;
|
||||
}
|
||||
/* destroy handle */
|
||||
// CHECK: status = hipsparseDestroy(handle);
|
||||
status = cusparseDestroy(handle);
|
||||
handle = 0;
|
||||
// CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) {
|
||||
if (status != CUSPARSE_STATUS_SUCCESS) {
|
||||
CLEANUP("CUSPARSE Library release of resources failed");
|
||||
return 1;
|
||||
}
|
||||
/* check the results */
|
||||
// Notice that CLEANUP() contains a call to cusparseDestroy(handle)
|
||||
if ((zHostPtr[0] != 950.0) ||
|
||||
(zHostPtr[1] != 400.0) ||
|
||||
(zHostPtr[2] != 2550.0) ||
|
||||
(zHostPtr[3] != 2600.0) ||
|
||||
(zHostPtr[4] != 0.0) ||
|
||||
(zHostPtr[5] != 49300.0) ||
|
||||
(zHostPtr[6] != 15200.0) ||
|
||||
(zHostPtr[7] != 132300.0) ||
|
||||
(zHostPtr[8] != 131200.0) ||
|
||||
(zHostPtr[9] != 0.0) ||
|
||||
(yHostPtr[0] != 10.0) ||
|
||||
(yHostPtr[1] != 20.0) ||
|
||||
(yHostPtr[2] != 30.0) ||
|
||||
(yHostPtr[3] != 40.0) ||
|
||||
(yHostPtr[4] != 680.0) ||
|
||||
(yHostPtr[5] != 760.0) ||
|
||||
(yHostPtr[6] != 1230.0) ||
|
||||
(yHostPtr[7] != 2240.0)) {
|
||||
CLEANUP("example test FAILED");
|
||||
return 1;
|
||||
} else {
|
||||
CLEANUP("example test PASSED");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,284 @@
|
||||
// RUN: %run_test hipify "%s" "%t" %cuda_args
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
// CHECK: #include <hip/hip_runtime.h>
|
||||
#include <cuda_runtime.h>
|
||||
// CHECK: #include <hipblas.h>
|
||||
#include <cublas_v2.h>
|
||||
// CHECK: #include "hipsparse.h"
|
||||
#include "cusparse.h"
|
||||
|
||||
void printMatrix(int m, int n, const double*A, int lda, const char* name)
|
||||
{
|
||||
for(int row = 0 ; row < m ; row++){
|
||||
for(int col = 0 ; col < n ; col++){
|
||||
double Areg = A[row + col*lda];
|
||||
printf("%s(%d,%d) = %f\n", name, row+1, col+1, Areg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char*argv[])
|
||||
{
|
||||
// CHECK: hipblasHandle_t cublasH = NULL;
|
||||
cublasHandle_t cublasH = NULL;
|
||||
// CHECK: hipsparseHandle_t cusparseH = NULL;
|
||||
cusparseHandle_t cusparseH = NULL;
|
||||
// CHECK: hipStream_t stream = NULL;
|
||||
cudaStream_t stream = NULL;
|
||||
// CHECK: hipsparseMatDescr_t descrA = NULL;
|
||||
cusparseMatDescr_t descrA = NULL;
|
||||
// CHECK: hipblasStatus_t cublasStat = HIPBLAS_STATUS_SUCCESS;
|
||||
cublasStatus_t cublasStat = CUBLAS_STATUS_SUCCESS;
|
||||
// CHECK: hipsparseStatus_t cusparseStat = HIPSPARSE_STATUS_SUCCESS;
|
||||
cusparseStatus_t cusparseStat = CUSPARSE_STATUS_SUCCESS;
|
||||
// CHECK: hipError_t cudaStat1 = hipSuccess;
|
||||
// CHECK: hipError_t cudaStat2 = hipSuccess;
|
||||
// CHECK: hipError_t cudaStat3 = hipSuccess;
|
||||
// CHECK: hipError_t cudaStat4 = hipSuccess;
|
||||
// CHECK: hipError_t cudaStat5 = hipSuccess;
|
||||
cudaError_t cudaStat1 = cudaSuccess;
|
||||
cudaError_t cudaStat2 = cudaSuccess;
|
||||
cudaError_t cudaStat3 = cudaSuccess;
|
||||
cudaError_t cudaStat4 = cudaSuccess;
|
||||
cudaError_t cudaStat5 = cudaSuccess;
|
||||
const int n = 4;
|
||||
const int nnzA = 9;
|
||||
/*
|
||||
* | 1 0 2 3 |
|
||||
* | 0 4 0 0 |
|
||||
* A = | 5 0 6 7 |
|
||||
* | 0 8 0 9 |
|
||||
*
|
||||
* eigevales are { -0.5311, 7.5311, 9.0000, 4.0000 }
|
||||
*
|
||||
* The largest eigenvaluse is 9 and corresponding eigenvector is
|
||||
*
|
||||
* | 0.3029 |
|
||||
* v = | 0 |
|
||||
* | 0.9350 |
|
||||
* | 0.1844 |
|
||||
*/
|
||||
const int csrRowPtrA[n+1] = { 0, 3, 4, 7, 9 };
|
||||
const int csrColIndA[nnzA] = {0, 2, 3, 1, 0, 2, 3, 1, 3 };
|
||||
const double csrValA[nnzA] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 };
|
||||
const double lambda_exact[n] = { 9.0000, 7.5311, 4.0000, -0.5311 };
|
||||
const double x0[n] = {1.0, 2.0, 3.0, 4.0 }; /* initial guess */
|
||||
double x[n]; /* numerical eigenvector */
|
||||
|
||||
int *d_csrRowPtrA = NULL;
|
||||
int *d_csrColIndA = NULL;
|
||||
double *d_csrValA = NULL;
|
||||
|
||||
double *d_x = NULL; /* eigenvector */
|
||||
double *d_y = NULL; /* workspace */
|
||||
|
||||
const double tol = 1.e-6;
|
||||
const int max_ites = 30;
|
||||
|
||||
const double h_one = 1.0;
|
||||
const double h_zero = 0.0;
|
||||
|
||||
printf("example of csrmv_mp \n");
|
||||
printf("tol = %E \n", tol);
|
||||
printf("max. iterations = %d \n", max_ites);
|
||||
|
||||
printf("1st eigenvaluse is %f\n", lambda_exact[0] );
|
||||
printf("2nd eigenvaluse is %f\n", lambda_exact[1] );
|
||||
|
||||
double alpha = lambda_exact[1]/lambda_exact[0] ;
|
||||
printf("convergence rate is %f\n", alpha );
|
||||
|
||||
double est_iterations = log(tol)/log(alpha);
|
||||
printf("# of iterations required is %d\n", (int)ceil(est_iterations));
|
||||
|
||||
// step 1: create cublas/cusparse handle, bind a stream
|
||||
// CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking);
|
||||
cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cublasStat = hipblasCreate(&cublasH);
|
||||
cublasStat = cublasCreate(&cublasH);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
// CHECK: cublasStat = hipblasSetStream(cublasH, stream);
|
||||
cublasStat = cublasSetStream(cublasH, stream);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
// CHECK: cusparseStat = hipsparseCreate(&cusparseH);
|
||||
cusparseStat = cusparseCreate(&cusparseH);
|
||||
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == cusparseStat);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == cusparseStat);
|
||||
// CHECK: cusparseStat = hipsparseSetStream(cusparseH, stream);
|
||||
cusparseStat = cusparseSetStream(cusparseH, stream);
|
||||
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == cusparseStat);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == cusparseStat);
|
||||
|
||||
// step 2: configuration of matrix A
|
||||
// cusparseStat = hipsparseCreateMatDescr(&descrA);
|
||||
cusparseStat = cusparseCreateMatDescr(&descrA);
|
||||
// assert(HIPSPARSE_STATUS_SUCCESS == cusparseStat);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == cusparseStat);
|
||||
// CHECK: hipsparseSetMatIndexBase(descrA,HIPSPARSE_INDEX_BASE_ZERO);
|
||||
cusparseSetMatIndexBase(descrA,CUSPARSE_INDEX_BASE_ZERO);
|
||||
// CHECK: hipsparseSetMatType(descrA, HIPSPARSE_MATRIX_TYPE_GENERAL );
|
||||
cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL );
|
||||
|
||||
// step 3: copy A and x0 to device
|
||||
// CHECK: cudaStat1 = hipMalloc ((void**)&d_csrRowPtrA, sizeof(int) * (n+1) );
|
||||
cudaStat1 = cudaMalloc ((void**)&d_csrRowPtrA, sizeof(int) * (n+1) );
|
||||
// CHECK: cudaStat2 = hipMalloc ((void**)&d_csrColIndA, sizeof(int) * nnzA );
|
||||
cudaStat2 = cudaMalloc ((void**)&d_csrColIndA, sizeof(int) * nnzA );
|
||||
// CHECK: cudaStat3 = hipMalloc ((void**)&d_csrValA , sizeof(double) * nnzA );
|
||||
cudaStat3 = cudaMalloc ((void**)&d_csrValA , sizeof(double) * nnzA );
|
||||
// CHECK: cudaStat4 = hipMalloc ((void**)&d_x , sizeof(double) * n );
|
||||
cudaStat4 = cudaMalloc ((void**)&d_x , sizeof(double) * n );
|
||||
// CHECK: cudaStat5 = hipMalloc ((void**)&d_y , sizeof(double) * n );
|
||||
cudaStat5 = cudaMalloc ((void**)&d_y , sizeof(double) * n );
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
// CHECK: assert(hipSuccess == cudaStat2);
|
||||
// CHECK: assert(hipSuccess == cudaStat3);
|
||||
// CHECK: assert(hipSuccess == cudaStat4);
|
||||
// CHECK: assert(hipSuccess == cudaStat5);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat2);
|
||||
assert(cudaSuccess == cudaStat3);
|
||||
assert(cudaSuccess == cudaStat4);
|
||||
assert(cudaSuccess == cudaStat5);
|
||||
|
||||
// CHECK: cudaStat1 = hipMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int) * (n+1) , hipMemcpyHostToDevice);
|
||||
cudaStat1 = cudaMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int) * (n+1) , cudaMemcpyHostToDevice);
|
||||
// CHECK: cudaStat2 = hipMemcpy(d_csrColIndA, csrColIndA, sizeof(int) * nnzA , hipMemcpyHostToDevice);
|
||||
cudaStat2 = cudaMemcpy(d_csrColIndA, csrColIndA, sizeof(int) * nnzA , cudaMemcpyHostToDevice);
|
||||
// CHECK: cudaStat3 = hipMemcpy(d_csrValA , csrValA , sizeof(double) * nnzA , hipMemcpyHostToDevice);
|
||||
cudaStat3 = cudaMemcpy(d_csrValA , csrValA , sizeof(double) * nnzA , cudaMemcpyHostToDevice);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: assert(hipSuccess == cudaStat2);
|
||||
assert(cudaSuccess == cudaStat2);
|
||||
// CHECK: assert(hipSuccess == cudaStat3);
|
||||
assert(cudaSuccess == cudaStat3);
|
||||
|
||||
// step 4: power method
|
||||
double lambda = 0.0;
|
||||
double lambda_next = 0.0;
|
||||
|
||||
// 4.1: initial guess x0
|
||||
cudaStat1 = cudaMemcpy(d_x, x0, sizeof(double) * n, cudaMemcpyHostToDevice);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
|
||||
for(int ite = 0 ; ite < max_ites ; ite++ ){
|
||||
// 4.2: normalize vector x
|
||||
// x = x / |x|
|
||||
double nrm2_x;
|
||||
// TODO: cublasStat = hipblasDnrm2_v2(cublasH,
|
||||
cublasStat = cublasDnrm2_v2(cublasH,
|
||||
n,
|
||||
d_x,
|
||||
1, // incx,
|
||||
&nrm2_x /* host pointer */
|
||||
);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
double one_over_nrm2_x = 1.0 / nrm2_x;
|
||||
// TODO: cublasStat = hipblasDscal_v2( cublasH,
|
||||
cublasStat = cublasDscal_v2( cublasH,
|
||||
n,
|
||||
&one_over_nrm2_x, /* host pointer */
|
||||
d_x,
|
||||
1 // incx
|
||||
);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
|
||||
// 4.3: y = A*x
|
||||
// TODO: hipsparseStat = cusparseDcsrmv_mp(cusparseH,
|
||||
// CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE
|
||||
cusparseStat = cusparseDcsrmv_mp(cusparseH,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
n,
|
||||
n,
|
||||
nnzA,
|
||||
&h_one,
|
||||
descrA,
|
||||
d_csrValA,
|
||||
d_csrRowPtrA,
|
||||
d_csrColIndA,
|
||||
d_x,
|
||||
&h_zero,
|
||||
d_y);
|
||||
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == cusparseStat);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == cusparseStat);
|
||||
|
||||
// 4.4: lambda = y**T*x
|
||||
// TODO: cublasStat = hipblasDdot_v2 ( cublasH,
|
||||
cublasStat = cublasDdot_v2 ( cublasH,
|
||||
n,
|
||||
d_x,
|
||||
1, // incx,
|
||||
d_y,
|
||||
1, // incy,
|
||||
&lambda_next /* host pointer */
|
||||
);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
|
||||
double lambda_err = fabs( lambda_next - lambda_exact[0] );
|
||||
printf("ite %d: lambda = %f, error = %E\n", ite, lambda_next, lambda_err );
|
||||
|
||||
// 4.5: check if converges
|
||||
if ( (ite > 0) &&
|
||||
fabs( lambda - lambda_next ) < tol
|
||||
){
|
||||
break; // converges
|
||||
}
|
||||
|
||||
/*
|
||||
* 4.6: x := y
|
||||
* lambda = lambda_next
|
||||
*
|
||||
* so new approximation is (lambda, x), x is not normalized.
|
||||
*/
|
||||
// CHECK: cudaStat1 = hipMemcpy(d_x, d_y, sizeof(double) * n , hipMemcpyDeviceToDevice);
|
||||
cudaStat1 = cudaMemcpy(d_x, d_y, sizeof(double) * n , cudaMemcpyDeviceToDevice);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
|
||||
lambda = lambda_next;
|
||||
}
|
||||
// step 5: report eigen-pair
|
||||
// CHECK: cudaStat1 = hipMemcpy(x, d_x, sizeof(double) * n, hipMemcpyDeviceToHost);
|
||||
cudaStat1 = cudaMemcpy(x, d_x, sizeof(double) * n, cudaMemcpyDeviceToHost);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
|
||||
printf("largest eigenvalue is %E\n", lambda );
|
||||
printf("eigenvector = (matlab base-1)\n");
|
||||
printMatrix(n, 1, x, n, "V0");
|
||||
printf("=====\n");
|
||||
|
||||
// free resources
|
||||
// CHECK: if (d_csrRowPtrA ) hipFree(d_csrRowPtrA);
|
||||
if (d_csrRowPtrA ) cudaFree(d_csrRowPtrA);
|
||||
// CHECK: if (d_csrColIndA ) hipFree(d_csrColIndA);
|
||||
if (d_csrColIndA ) cudaFree(d_csrColIndA);
|
||||
// CHECK: if (d_csrValA ) hipFree(d_csrValA);
|
||||
if (d_csrValA ) cudaFree(d_csrValA);
|
||||
// CHECK: if (d_x ) hipFree(d_x);
|
||||
if (d_x ) cudaFree(d_x);
|
||||
// CHeCK: if (d_y ) hipFree(d_y);
|
||||
if (d_y ) cudaFree(d_y);
|
||||
// CHECK: if (cublasH ) hipblasDestroy(cublasH);
|
||||
if (cublasH ) cublasDestroy(cublasH);
|
||||
// CHECK: if (cusparseH ) hipsparseDestroy(cusparseH);
|
||||
if (cusparseH ) cusparseDestroy(cusparseH);
|
||||
// CHECK: if (stream ) hipStreamDestroy(stream);
|
||||
if (stream ) cudaStreamDestroy(stream);
|
||||
// CHECK: if (descrA ) hipsparseDestroyMatDescr(descrA);
|
||||
if (descrA ) cusparseDestroyMatDescr(descrA);
|
||||
// CHECK: hipDeviceReset();
|
||||
cudaDeviceReset();
|
||||
return 0;
|
||||
}
|
||||
Yeni konuda referans
Bir kullanıcı engelle