[HIPIFY][SPARSE] Add 3 more CUDA 10.0 tests
+ lit update
+ fix typos
[ROCm/clr commit: a421aff96d]
Bu işleme şunda yer alıyor:
@@ -576,7 +576,7 @@
|
||||
|`cusparseXcoosortByColumn` |`hipsparseXcoosortByColumn` |
|
||||
|`cusparseXcsrsort_bufferSizeExt` |`hipsparseXcsrsort_bufferSizeExt` |
|
||||
|`cusparseXcsrsort` |`hipsparseXcsrsort` |
|
||||
|`cusparseScusparseXcscsort_bufferSizeExtnnz` | |
|
||||
|`cusparseXcscsort_bufferSizeExt` | |
|
||||
|`cusparseXcscsort` | |
|
||||
|`cusparseCreateCsru2csrInfo` | |
|
||||
|`cusparseDestroyCsru2csrInfo` | |
|
||||
|
||||
@@ -554,7 +554,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_SPARSE_FUNCTION_MAP{
|
||||
{"cusparseXcsrsort_bufferSizeExt", {"hipsparseXcsrsort_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE}},
|
||||
{"cusparseXcsrsort", {"hipsparseXcsrsort", CONV_LIB_FUNC, API_SPARSE}},
|
||||
|
||||
{"cusparseScusparseXcscsort_bufferSizeExtnnz", {"hipsparseXcscsort_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseXcscsort_bufferSizeExt", {"hipsparseXcscsort_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
{"cusparseXcscsort", {"hipsparseXcscsort", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
{"cusparseCreateCsru2csrInfo", {"hipsparseCreateCsru2csrInfo", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}},
|
||||
|
||||
@@ -277,8 +277,8 @@ int main(int argc, char*argv[])
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
/* step 4: prepare workspace */
|
||||
// NOTE: CUDA 10.0
|
||||
// CHECK: status = hipsparseSgtsvInterleavedBatch_bufferSizeExt(
|
||||
status = cusparseSgtsvInterleavedBatch_bufferSizeExt(
|
||||
// TODO: status = hipsparseSgtsvInterleavedBatch_bufferSizeExt(
|
||||
status = cusparseSgtsvInterleavedBatch_bufferSizeExt(
|
||||
cusparseH,
|
||||
algo,
|
||||
n,
|
||||
@@ -299,7 +299,7 @@ int main(int argc, char*argv[])
|
||||
|
||||
/* step 5: solve Aj*xj = bj */
|
||||
// NOTE: CUDA 10.0
|
||||
// CHECK: status = hipsparseSgtsvInterleavedBatch(
|
||||
// TODO: status = hipsparseSgtsvInterleavedBatch(
|
||||
status = cusparseSgtsvInterleavedBatch(
|
||||
cusparseH,
|
||||
algo,
|
||||
|
||||
@@ -0,0 +1,414 @@
|
||||
// RUN: %run_test hipify "%s" "%t" %cuda_args
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
// CHECK: #include <hip/hip_runtime.h>
|
||||
#include <cuda_runtime.h>
|
||||
// CHECK: #include <hipsparse.h>
|
||||
#include <cusparse.h>
|
||||
// CHECK: #include <hipblas.h>
|
||||
#include <cublas_v2.h>
|
||||
|
||||
// NOTE: CUDA 10.0
|
||||
|
||||
/*
|
||||
* compute | b - A*x|_inf
|
||||
*/
|
||||
void residaul_eval(
|
||||
int n,
|
||||
const float *dl,
|
||||
const float *d,
|
||||
const float *du,
|
||||
const float *b,
|
||||
const float *x,
|
||||
float *r_nrminf_ptr)
|
||||
{
|
||||
float r_nrminf = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
float dot = 0;
|
||||
if (i > 0) {
|
||||
dot += dl[i] * x[i - 1];
|
||||
}
|
||||
dot += d[i] * x[i];
|
||||
if (i < (n - 1)) {
|
||||
dot += du[i] * x[i + 1];
|
||||
}
|
||||
float ri = b[i] - dot;
|
||||
r_nrminf = (r_nrminf > fabs(ri)) ? r_nrminf : fabs(ri);
|
||||
}
|
||||
|
||||
*r_nrminf_ptr = r_nrminf;
|
||||
}
|
||||
|
||||
int main(int argc, char*argv[])
|
||||
{
|
||||
// CHECK: hipsparseHandle_t cusparseH = NULL;
|
||||
cusparseHandle_t cusparseH = NULL;
|
||||
// CHECK: hipblasHandle_t cublasH = NULL;
|
||||
cublasHandle_t cublasH = NULL;
|
||||
// CHECK: hipStream_t stream = NULL;
|
||||
cudaStream_t stream = NULL;
|
||||
// CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS;
|
||||
cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS;
|
||||
// CHECK: hipblasStatus_t cublasStat = HIPBLAS_STATUS_SUCCESS;
|
||||
cublasStatus_t cublasStat = CUBLAS_STATUS_SUCCESS;
|
||||
// CHECK: hipError_t cudaStat1 = hipSuccess;
|
||||
cudaError_t cudaStat1 = cudaSuccess;
|
||||
|
||||
const int n = 3;
|
||||
const int batchSize = 2;
|
||||
/*
|
||||
* | 1 6 0 | | 1 | | -0.603960 |
|
||||
* A1 =| 4 2 7 |, b1 = | 2 |, x1 = | 0.267327 |
|
||||
* | 0 5 3 | | 3 | | 0.554455 |
|
||||
*
|
||||
* | 8 13 0 | | 4 | | -0.063291 |
|
||||
* A2 =| 11 9 14 |, b2 = | 5 |, x2 = | 0.346641 |
|
||||
* | 0 12 10 | | 6 | | 0.184031 |
|
||||
*/
|
||||
|
||||
/*
|
||||
* A = (dl, d, du), B and X are in aggregate format
|
||||
*/
|
||||
const float dl[n * batchSize] = { 0, 4, 5, 0, 11, 12 };
|
||||
const float d[n * batchSize] = { 1, 2, 3, 8, 9, 10 };
|
||||
const float du[n * batchSize] = { 6, 7, 0, 13, 14, 0 };
|
||||
const float B[n * batchSize] = { 1, 2, 3, 4, 5, 6 };
|
||||
float X[n * batchSize]; /* Xj = Aj \ Bj */
|
||||
|
||||
/* device memory
|
||||
* (d_dl0, d_d0, d_du0) is aggregate format
|
||||
* (d_dl, d_d, d_du) is interleaved format
|
||||
*/
|
||||
float *d_dl0 = NULL;
|
||||
float *d_d0 = NULL;
|
||||
float *d_du0 = NULL;
|
||||
float *d_dl = NULL;
|
||||
float *d_d = NULL;
|
||||
float *d_du = NULL;
|
||||
float *d_B = NULL;
|
||||
float *d_X = NULL;
|
||||
|
||||
size_t lworkInBytes = 0;
|
||||
char *d_work = NULL;
|
||||
|
||||
/*
|
||||
* algo = 0: cuThomas (unstable)
|
||||
* algo = 1: LU with pivoting (stable)
|
||||
* algo = 2: QR (stable)
|
||||
*/
|
||||
const int algo = 2;
|
||||
|
||||
const float h_one = 1;
|
||||
const float h_zero = 0;
|
||||
|
||||
printf("example of gtsv (interleaved format) \n");
|
||||
printf("choose algo = 0,1,2 to select different algorithms \n");
|
||||
printf("n = %d, batchSize = %d, algo = %d \n", n, batchSize, algo);
|
||||
|
||||
/* step 1: create cusparse/cublas handle, bind a stream */
|
||||
// CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking);
|
||||
cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: status = hipsparseCreate(&cusparseH);
|
||||
status = cusparseCreate(&cusparseH);
|
||||
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == status);
|
||||
// CHECK: status = hipsparseSetStream(cusparseH, stream);
|
||||
status = cusparseSetStream(cusparseH, stream);
|
||||
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == status);
|
||||
// CHECK: cublasStat = hipblasCreate(&cublasH);
|
||||
cublasStat = cublasCreate(&cublasH);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
// CHECK: cublasStat = hipblasSetStream(cublasH, stream);
|
||||
cublasStat = cublasSetStream(cublasH, stream);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
|
||||
/* step 2: allocate device memory */
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_dl0, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_dl0, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_d0, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_d0, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_du0, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_du0, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_dl, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_dl, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_d, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_d, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_du, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_du, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_B, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_B, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_X, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_X, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
|
||||
/* step 3: prepare data in device, interleaved format */
|
||||
// CHECK: cudaStat1 = hipMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, hipMemcpyHostToDevice);
|
||||
cudaStat1 = cudaMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMemcpy(d_d0, d, sizeof(float)*n*batchSize, hipMemcpyHostToDevice);
|
||||
cudaStat1 = cudaMemcpy(d_d0, d, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMemcpy(d_du0, du, sizeof(float)*n*batchSize, hipMemcpyHostToDevice);
|
||||
cudaStat1 = cudaMemcpy(d_du0, du, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMemcpy(d_B, B, sizeof(float)*n*batchSize, hipMemcpyHostToDevice);
|
||||
cudaStat1 = cudaMemcpy(d_B, B, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: hipDeviceSynchronize();
|
||||
cudaDeviceSynchronize();
|
||||
/* convert dl to interleaved format
|
||||
* dl = transpose(dl0)
|
||||
*/
|
||||
// CHECK: cublasStat = hipblasSgeam(
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
cublasStat = cublasSgeam(
|
||||
cublasH,
|
||||
CUBLAS_OP_T, /* transa */
|
||||
CUBLAS_OP_T, /* transb, don't care */
|
||||
batchSize, /* number of rows of dl */
|
||||
n, /* number of columns of dl */
|
||||
&h_one,
|
||||
d_dl0, /* dl0 is n-by-batchSize */
|
||||
n, /* leading dimension of dl0 */
|
||||
&h_zero,
|
||||
NULL,
|
||||
n, /* don't care */
|
||||
d_dl, /* dl is batchSize-by-n */
|
||||
batchSize /* leading dimension of dl */
|
||||
);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
/* convert d to interleaved format
|
||||
* d = transpose(d0)
|
||||
*/
|
||||
// CHECK: cublasStat = hipblasSgeam(
|
||||
// CHECK: HIPBLAS_OP_T
|
||||
// CHECK: HIPBLAS_OP_T
|
||||
cublasStat = cublasSgeam(
|
||||
cublasH,
|
||||
CUBLAS_OP_T, /* transa */
|
||||
CUBLAS_OP_T, /* transb, don't care */
|
||||
batchSize, /* number of rows of d */
|
||||
n, /* number of columns of d */
|
||||
&h_one,
|
||||
d_d0, /* d0 is n-by-batchSize */
|
||||
n, /* leading dimension of d0 */
|
||||
&h_zero,
|
||||
NULL,
|
||||
n, /* don't cae */
|
||||
d_d, /* d is batchSize-by-n */
|
||||
batchSize /* leading dimension of d */
|
||||
);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
|
||||
/* convert du to interleaved format
|
||||
* du = transpose(du0)
|
||||
*/
|
||||
// CHECK: cublasStat = hipblasSgeam(
|
||||
// CHECK: HIPBLAS_OP_T
|
||||
// CHECK: HIPBLAS_OP_T
|
||||
cublasStat = cublasSgeam(
|
||||
cublasH,
|
||||
CUBLAS_OP_T, /* transa */
|
||||
CUBLAS_OP_T, /* transb, don't care */
|
||||
batchSize, /* number of rows of du */
|
||||
n, /* number of columns of du */
|
||||
&h_one,
|
||||
d_du0, /* du0 is n-by-batchSize */
|
||||
n, /* leading dimension of du0 */
|
||||
&h_zero,
|
||||
NULL,
|
||||
n, /* don't cae */
|
||||
d_du, /* du is batchSize-by-n */
|
||||
batchSize /* leading dimension of du */
|
||||
);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
|
||||
/* convert B to interleaved format
|
||||
* X = transpose(B)
|
||||
*/
|
||||
// CHECK: cublasStat = hipblasSgeam(
|
||||
// CHECK: HIPBLAS_OP_T
|
||||
// CHECK: HIPBLAS_OP_T
|
||||
cublasStat = cublasSgeam(
|
||||
cublasH,
|
||||
CUBLAS_OP_T, /* transa */
|
||||
CUBLAS_OP_T, /* transb, don't care */
|
||||
batchSize, /* number of rows of X */
|
||||
n, /* number of columns of X */
|
||||
&h_one,
|
||||
d_B, /* B is n-by-batchSize */
|
||||
n, /* leading dimension of B */
|
||||
&h_zero,
|
||||
NULL,
|
||||
n, /* don't cae */
|
||||
d_X, /* X is batchSize-by-n */
|
||||
batchSize /* leading dimension of X */
|
||||
);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
/* step 4: prepare workspace */
|
||||
// NOTE: CUDA 10.0
|
||||
// TODO: status = hipsparseSgtsvInterleavedBatch_bufferSizeExt(
|
||||
status = cusparseSgtsvInterleavedBatch_bufferSizeExt(
|
||||
cusparseH,
|
||||
algo,
|
||||
n,
|
||||
d_dl,
|
||||
d_d,
|
||||
d_du,
|
||||
d_X,
|
||||
batchSize,
|
||||
&lworkInBytes);
|
||||
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == status);
|
||||
|
||||
printf("lworkInBytes = %lld \n", (long long)lworkInBytes);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes);
|
||||
cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
|
||||
/* step 5: solve Aj*xj = bj */
|
||||
// NOTE: CUDA 10.0
|
||||
// TODO: status = hipsparseSgtsvInterleavedBatch(
|
||||
status = cusparseSgtsvInterleavedBatch(
|
||||
cusparseH,
|
||||
algo,
|
||||
n,
|
||||
d_dl,
|
||||
d_d,
|
||||
d_du,
|
||||
d_X,
|
||||
batchSize,
|
||||
d_work);
|
||||
// CHECK: cudaStat1 = hipDeviceSynchronize();
|
||||
cudaStat1 = cudaDeviceSynchronize();
|
||||
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == status);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
|
||||
/* step 6: convert X back to aggregate format */
|
||||
/* B = transpose(X) */
|
||||
// CHECK: cublasStat = hipblasSgeam(
|
||||
// CHECK: HIPBLAS_OP_T
|
||||
// CHECK: HIPBLAS_OP_T
|
||||
cublasStat = cublasSgeam(
|
||||
cublasH,
|
||||
CUBLAS_OP_T, /* transa */
|
||||
CUBLAS_OP_T, /* transb, don't care */
|
||||
n, /* number of rows of B */
|
||||
batchSize, /* number of columns of B */
|
||||
&h_one,
|
||||
d_X, /* X is batchSize-by-n */
|
||||
batchSize, /* leading dimension of X */
|
||||
&h_zero,
|
||||
NULL,
|
||||
n, /* don't cae */
|
||||
d_B, /* B is n-by-batchSize */
|
||||
n /* leading dimension of B */
|
||||
);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
// CHECK: hipDeviceSynchronize();
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
/* step 7: residual evaluation */
|
||||
// CHECK: cudaStat1 = hipMemcpy(X, d_B, sizeof(float)*n*batchSize, hipMemcpyDeviceToHost);
|
||||
cudaStat1 = cudaMemcpy(X, d_B, sizeof(float)*n*batchSize, cudaMemcpyDeviceToHost);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: hipDeviceSynchronize();
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
printf("==== x1 = inv(A1)*b1 \n");
|
||||
for (int j = 0; j < n; j++) {
|
||||
printf("x1[%d] = %f\n", j, X[j]);
|
||||
}
|
||||
|
||||
float r1_nrminf;
|
||||
residaul_eval(
|
||||
n,
|
||||
dl,
|
||||
d,
|
||||
du,
|
||||
B,
|
||||
X,
|
||||
&r1_nrminf
|
||||
);
|
||||
printf("|b1 - A1*x1| = %E\n", r1_nrminf);
|
||||
|
||||
printf("\n==== x2 = inv(A2)*b2 \n");
|
||||
for (int j = 0; j < n; j++) {
|
||||
printf("x2[%d] = %f\n", j, X[n + j]);
|
||||
}
|
||||
|
||||
float r2_nrminf;
|
||||
residaul_eval(
|
||||
n,
|
||||
dl + n,
|
||||
d + n,
|
||||
du + n,
|
||||
B + n,
|
||||
X + n,
|
||||
&r2_nrminf
|
||||
);
|
||||
printf("|b2 - A2*x2| = %E\n", r2_nrminf);
|
||||
|
||||
/* free resources */
|
||||
// CHECK: if (d_dl0) hipFree(d_dl0);
|
||||
if (d_dl0) cudaFree(d_dl0);
|
||||
// CHECK: if (d_d0) hipFree(d_d0);
|
||||
if (d_d0) cudaFree(d_d0);
|
||||
// CHECK: if (d_du0) hipFree(d_du0);
|
||||
if (d_du0) cudaFree(d_du0);
|
||||
// CHECK: if (d_dl) hipFree(d_dl);
|
||||
if (d_dl) cudaFree(d_dl);
|
||||
// CHECK: if (d_d) hipFree(d_d);
|
||||
if (d_d) cudaFree(d_d);
|
||||
// CHECK: if (d_du) hipFree(d_du);
|
||||
if (d_du) cudaFree(d_du);
|
||||
// CHECK: if (d_B) hipFree(d_B);
|
||||
if (d_B) cudaFree(d_B);
|
||||
// CHECK: if (d_X) hipFree(d_X);
|
||||
if (d_X) cudaFree(d_X);
|
||||
// CHECK: if (cusparseH) hipsparseDestroy(cusparseH);
|
||||
if (cusparseH) cusparseDestroy(cusparseH);
|
||||
// CHECK: if (cublasH) hipblasDestroy(cublasH);
|
||||
if (cublasH) cublasDestroy(cublasH);
|
||||
// CHECK: if (stream) hipStreamDestroy(stream);
|
||||
if (stream) cudaStreamDestroy(stream);
|
||||
// CHECK: hipDeviceReset();
|
||||
cudaDeviceReset();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,507 @@
|
||||
// RUN: %run_test hipify "%s" "%t" %cuda_args
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
// CHECK: #include <hip/hip_runtime.h>
|
||||
#include <cuda_runtime.h>
|
||||
// CHECK: #include <hipsparse.h>
|
||||
#include <cusparse.h>
|
||||
// CHECK: #include <hipblas.h>
|
||||
#include <cublas_v2.h>
|
||||
|
||||
// NOTE: CUDA 10.0
|
||||
|
||||
/*
|
||||
* compute | b - A*x|_inf
|
||||
*/
|
||||
void residaul_eval(
|
||||
int n,
|
||||
const float *ds,
|
||||
const float *dl,
|
||||
const float *d,
|
||||
const float *du,
|
||||
const float *dw,
|
||||
const float *b,
|
||||
const float *x,
|
||||
float *r_nrminf_ptr)
|
||||
{
|
||||
float r_nrminf = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
float dot = 0;
|
||||
if (i > 1) {
|
||||
dot += ds[i] * x[i - 2];
|
||||
}
|
||||
if (i > 0) {
|
||||
dot += dl[i] * x[i - 1];
|
||||
}
|
||||
dot += d[i] * x[i];
|
||||
if (i < (n - 1)) {
|
||||
dot += du[i] * x[i + 1];
|
||||
}
|
||||
if (i < (n - 2)) {
|
||||
dot += dw[i] * x[i + 2];
|
||||
}
|
||||
float ri = b[i] - dot;
|
||||
r_nrminf = (r_nrminf > fabs(ri)) ? r_nrminf : fabs(ri);
|
||||
}
|
||||
|
||||
*r_nrminf_ptr = r_nrminf;
|
||||
}
|
||||
|
||||
int main(int argc, char*argv[])
|
||||
{
|
||||
// CHECK: hipsparseHandle_t cusparseH = NULL;
|
||||
cusparseHandle_t cusparseH = NULL;
|
||||
// CHECK: hipblasHandle_t cublasH = NULL;
|
||||
cublasHandle_t cublasH = NULL;
|
||||
// CHECK: hipStream_t stream = NULL;
|
||||
cudaStream_t stream = NULL;
|
||||
// CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS;
|
||||
cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS;
|
||||
// CHECK: hipblasStatus_t cublasStat = HIPBLAS_STATUS_SUCCESS;
|
||||
cublasStatus_t cublasStat = CUBLAS_STATUS_SUCCESS;
|
||||
// CHECK: hipError_t cudaStat1 = hipSuccess;
|
||||
cudaError_t cudaStat1 = cudaSuccess;
|
||||
|
||||
const int n = 4;
|
||||
const int batchSize = 2;
|
||||
|
||||
/*
|
||||
* | 1 8 13 0 | | 1 | | -0.0592 |
|
||||
* A1 =| 5 2 9 14 |, b1 = | 2 |, x1 = | 0.3428 |
|
||||
* | 11 6 3 10 | | 3 | | -0.1295 |
|
||||
* | 0 12 7 4 | | 4 | | 0.1982 |
|
||||
*
|
||||
* | 15 22 27 0 | | 5 | | -0.0012 |
|
||||
* A2 =| 19 16 23 28 |, b2 = | 6 |, x2 = | 0.2792 |
|
||||
* | 25 20 17 24 | | 7 | | -0.0416 |
|
||||
* | 0 26 21 18 | | 8 | | 0.0898 |
|
||||
*/
|
||||
|
||||
/*
|
||||
* A = (ds, dl, d, du, dw), B and X are in aggregate format
|
||||
*/
|
||||
const float ds[n * batchSize] = { 0, 0, 11, 12, 0, 0, 25, 26 };
|
||||
const float dl[n * batchSize] = { 0, 5, 6, 7, 0, 19, 20, 21 };
|
||||
const float d[n * batchSize] = { 1, 2, 3, 4, 15, 16, 17, 18 };
|
||||
const float du[n * batchSize] = { 8, 9, 10, 0, 22, 23, 24, 0 };
|
||||
const float dw[n * batchSize] = { 13,14, 0, 0, 27, 28, 0, 0 };
|
||||
const float B[n * batchSize] = { 1, 2, 3, 4, 5, 6, 7, 8 };
|
||||
float X[n * batchSize]; /* Xj = Aj \ Bj */
|
||||
|
||||
/* device memory
|
||||
* (d_ds0, d_dl0, d_d0, d_du0, d_dw0) is aggregate format
|
||||
* (d_ds, d_dl, d_d, d_du, d_dw) is interleaved format
|
||||
*/
|
||||
float *d_ds0 = NULL;
|
||||
float *d_dl0 = NULL;
|
||||
float *d_d0 = NULL;
|
||||
float *d_du0 = NULL;
|
||||
float *d_dw0 = NULL;
|
||||
float *d_ds = NULL;
|
||||
float *d_dl = NULL;
|
||||
float *d_d = NULL;
|
||||
float *d_du = NULL;
|
||||
float *d_dw = NULL;
|
||||
float *d_B = NULL;
|
||||
float *d_X = NULL;
|
||||
|
||||
size_t lworkInBytes = 0;
|
||||
char *d_work = NULL;
|
||||
|
||||
const float h_one = 1;
|
||||
const float h_zero = 0;
|
||||
|
||||
int algo = 0; /* QR factorization */
|
||||
|
||||
printf("example of gpsv (interleaved format) \n");
|
||||
printf("n = %d, batchSize = %d\n", n, batchSize);
|
||||
|
||||
/* step 1: create cusparse/cublas handle, bind a stream */
|
||||
// CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking);
|
||||
cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: status = hipsparseCreate(&cusparseH);
|
||||
status = cusparseCreate(&cusparseH);
|
||||
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == status);
|
||||
// CHECK: status = hipsparseSetStream(cusparseH, stream);
|
||||
status = cusparseSetStream(cusparseH, stream);
|
||||
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == status);
|
||||
// CHECK: cublasStat = hipblasCreate(&cublasH);
|
||||
cublasStat = cublasCreate(cublasH);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
// CHECK: cublasStat = hipblasSetStream(cublasH, stream);
|
||||
cublasStat = cublasSetStream(cublasH, stream);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
/* step 2: allocate device memory */
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_ds0, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_ds0, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_dl0, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_dl0, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_d0, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_d0, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_du0, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_du0, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_dw0, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_dw0, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_ds, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_ds, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_dl, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_dl, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_d, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_d, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_du, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_du, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_dw, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_dw, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_B, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_B, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_X, sizeof(float)*n*batchSize);
|
||||
cudaStat1 = cudaMalloc((void**)&d_X, sizeof(float)*n*batchSize);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
/* step 3: prepare data in device, interleaved format */
|
||||
// CHECK: cudaStat1 = hipMemcpy(d_ds0, ds, sizeof(float)*n*batchSize, hipMemcpyHostToDevice);
|
||||
cudaStat1 = cudaMemcpy(d_ds0, ds, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, hipMemcpyHostToDevice);
|
||||
cudaStat1 = cudaMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMemcpy(d_d0, d, sizeof(float)*n*batchSize, hipMemcpyHostToDevice);
|
||||
cudaStat1 = cudaMemcpy(d_d0, d, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMemcpy(d_du0, du, sizeof(float)*n*batchSize, hipMemcpyHostToDevice);
|
||||
cudaStat1 = cudaMemcpy(d_du0, du, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMemcpy(d_dw0, dw, sizeof(float)*n*batchSize, hipMemcpyHostToDevice);
|
||||
cudaStat1 = cudaMemcpy(d_dw0, dw, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMemcpy(d_B, B, sizeof(float)*n*batchSize, hipMemcpyHostToDevice);
|
||||
cudaStat1 = cudaMemcpy(d_B, B, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: hipDeviceSynchronize();
|
||||
cudaDeviceSynchronize();
|
||||
/* convert ds to interleaved format
|
||||
* ds = transpose(ds0) */
|
||||
// CHECK: cublasStat = hipblasSgeam(
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
cublasStat = cublasSgeam(
|
||||
cublasH,
|
||||
CUBLAS_OP_T, /* transa */
|
||||
CUBLAS_OP_T, /* transb, don't care */
|
||||
batchSize, /* number of rows of ds */
|
||||
n, /* number of columns of ds */
|
||||
&h_one,
|
||||
d_ds0, /* ds0 is n-by-batchSize */
|
||||
n, /* leading dimension of ds0 */
|
||||
&h_zero,
|
||||
NULL,
|
||||
n, /* don't cae */
|
||||
d_ds, /* ds is batchSize-by-n */
|
||||
batchSize); /* leading dimension of ds */
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
/* convert dl to interleaved format
|
||||
* dl = transpose(dl0)
|
||||
*/
|
||||
// CHECK: cublasStat = hipblasSgeam(
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
cublasStat = cublasSgeam(
|
||||
cublasH,
|
||||
CUBLAS_OP_T, /* transa */
|
||||
CUBLAS_OP_T, /* transb, don't care */
|
||||
batchSize, /* number of rows of dl */
|
||||
n, /* number of columns of dl */
|
||||
&h_one,
|
||||
d_dl0, /* dl0 is n-by-batchSize */
|
||||
n, /* leading dimension of dl0 */
|
||||
&h_zero,
|
||||
NULL,
|
||||
n, /* don't cae */
|
||||
d_dl, /* dl is batchSize-by-n */
|
||||
batchSize /* leading dimension of dl */
|
||||
);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
|
||||
/* convert d to interleaved format
|
||||
* d = transpose(d0)
|
||||
*/
|
||||
// CHECK: cublasStat = hipblasSgeam(
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
cublasStat = cublasSgeam(
|
||||
cublasH,
|
||||
CUBLAS_OP_T, /* transa */
|
||||
CUBLAS_OP_T, /* transb, don't care */
|
||||
batchSize, /* number of rows of d */
|
||||
n, /* number of columns of d */
|
||||
&h_one,
|
||||
d_d0, /* d0 is n-by-batchSize */
|
||||
n, /* leading dimension of d0 */
|
||||
&h_zero,
|
||||
NULL,
|
||||
n, /* don't cae */
|
||||
d_d, /* d is batchSize-by-n */
|
||||
batchSize /* leading dimension of d */
|
||||
);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
|
||||
/* convert du to interleaved format
|
||||
* du = transpose(du0)
|
||||
*/
|
||||
// CHECK: cublasStat = hipblasSgeam(
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
cublasStat = cublasSgeam(
|
||||
cublasH,
|
||||
CUBLAS_OP_T, /* transa */
|
||||
CUBLAS_OP_T, /* transb, don't care */
|
||||
batchSize, /* number of rows of du */
|
||||
n, /* number of columns of du */
|
||||
&h_one,
|
||||
d_du0, /* du0 is n-by-batchSize */
|
||||
n, /* leading dimension of du0 */
|
||||
&h_zero,
|
||||
NULL,
|
||||
n, /* don't cae */
|
||||
d_du, /* du is batchSize-by-n */
|
||||
batchSize /* leading dimension of du */
|
||||
);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
/* convert dw to interleaved format
|
||||
* dw = transpose(dw0)
|
||||
*/
|
||||
// CHECK: cublasStat = hipblasSgeam(
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
cublasStat = cublasSgeam(
|
||||
cublasH,
|
||||
CUBLAS_OP_T, /* transa */
|
||||
CUBLAS_OP_T, /* transb, don't care */
|
||||
batchSize, /* number of rows of dw */
|
||||
n, /* number of columns of dw */
|
||||
&h_one,
|
||||
d_dw0, /* dw0 is n-by-batchSize */
|
||||
n, /* leading dimension of dw0 */
|
||||
&h_zero,
|
||||
NULL,
|
||||
n, /* don't cae */
|
||||
d_dw, /* dw is batchSize-by-n */
|
||||
batchSize /* leading dimension of dw */
|
||||
);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
|
||||
/* convert B to interleaved format
|
||||
* X = transpose(B)
|
||||
*/
|
||||
// CHECK: cublasStat = hipblasSgeam(
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
cublasStat = cublasSgeam(
|
||||
cublasH,
|
||||
CUBLAS_OP_T, /* transa */
|
||||
CUBLAS_OP_T, /* transb, don't care */
|
||||
batchSize, /* number of rows of X */
|
||||
n, /* number of columns of X */
|
||||
&h_one,
|
||||
d_B, /* B is n-by-batchSize */
|
||||
n, /* leading dimension of B */
|
||||
&h_zero,
|
||||
NULL,
|
||||
n, /* don't cae */
|
||||
d_X, /* X is batchSize-by-n */
|
||||
batchSize /* leading dimension of X */
|
||||
);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
|
||||
/* step 4: prepare workspace */
|
||||
// NOTE: CUDA 10.0
|
||||
// TODO: status = hipsparseSgpsvInterleavedBatch_bufferSizeExt(
|
||||
status = cusparseSgpsvInterleavedBatch_bufferSizeExt(
|
||||
cusparseH,
|
||||
algo,
|
||||
n,
|
||||
d_ds,
|
||||
d_dl,
|
||||
d_d,
|
||||
d_du,
|
||||
d_dw,
|
||||
d_X,
|
||||
batchSize,
|
||||
&lworkInBytes);
|
||||
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == status);
|
||||
|
||||
printf("lworkInBytes = %lld \n", (long long)lworkInBytes);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes);
|
||||
cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
/* step 5: solve Aj*xj = bj */
|
||||
// NOTE: CUDA 10.0
|
||||
// TODO: status = hipsparseSgpsvInterleavedBatch(
|
||||
status = cusparseSgpsvInterleavedBatch(
|
||||
cusparseH,
|
||||
algo,
|
||||
n,
|
||||
d_ds,
|
||||
d_dl,
|
||||
d_d,
|
||||
d_du,
|
||||
d_dw,
|
||||
d_X,
|
||||
batchSize,
|
||||
d_work);
|
||||
// CHECK: cudaStat1 = hipDeviceSynchronize();
|
||||
cudaStat1 = cudaDeviceSynchronize();
|
||||
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == status);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
|
||||
/* step 6: convert X back to aggregate format */
|
||||
/* B = transpose(X) */
|
||||
// CHECK: cublasStat = hipblasSgeam(
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
// CHECK: HIPBLAS_OP_T,
|
||||
cublasStat = cublasSgeam(
|
||||
cublasH,
|
||||
CUBLAS_OP_T, /* transa */
|
||||
CUBLAS_OP_T, /* transb, don't care */
|
||||
n, /* number of rows of B */
|
||||
batchSize, /* number of columns of B */
|
||||
&h_one,
|
||||
d_X, /* X is batchSize-by-n */
|
||||
batchSize, /* leading dimension of X */
|
||||
&h_zero,
|
||||
NULL,
|
||||
n, /* don't cae */
|
||||
d_B, /* B is n-by-batchSize */
|
||||
n /* leading dimension of B */
|
||||
);
|
||||
// CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat);
|
||||
assert(CUBLAS_STATUS_SUCCESS == cublasStat);
|
||||
// CHECK: hipDeviceSynchronize();
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
/* step 7: residual evaluation */
|
||||
// CHECK: cudaStat1 = hipMemcpy(X, d_B, sizeof(float)*n*batchSize, hipMemcpyDeviceToHost);
|
||||
cudaStat1 = cudaMemcpy(X, d_B, sizeof(float)*n*batchSize, cudaMemcpyDeviceToHost);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: hipDeviceSynchronize();
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
printf("==== x1 = inv(A1)*b1 \n");
|
||||
for (int j = 0; j < n; j++) {
|
||||
printf("x1[%d] = %f\n", j, X[j]);
|
||||
}
|
||||
|
||||
float r1_nrminf;
|
||||
residaul_eval(
|
||||
n,
|
||||
ds,
|
||||
dl,
|
||||
d,
|
||||
du,
|
||||
dw,
|
||||
B,
|
||||
X,
|
||||
&r1_nrminf
|
||||
);
|
||||
printf("|b1 - A1*x1| = %E\n", r1_nrminf);
|
||||
printf("\n==== x2 = inv(A2)*b2 \n");
|
||||
for (int j = 0; j < n; j++) {
|
||||
printf("x2[%d] = %f\n", j, X[n + j]);
|
||||
}
|
||||
|
||||
float r2_nrminf;
|
||||
residaul_eval(
|
||||
n,
|
||||
ds + n,
|
||||
dl + n,
|
||||
d + n,
|
||||
du + n,
|
||||
dw + n,
|
||||
B + n,
|
||||
X + n,
|
||||
&r2_nrminf
|
||||
);
|
||||
printf("|b2 - A2*x2| = %E\n", r2_nrminf);
|
||||
|
||||
/* free resources */
|
||||
// CHECK: if (d_ds0) hipFree(d_ds0);
|
||||
if (d_ds0) cudaFree(d_ds0);
|
||||
// CHECK: if (d_dl0) hipFree(d_dl0);
|
||||
if (d_dl0) cudaFree(d_dl0);
|
||||
// CHECK: if (d_d0) hipFree(d_d0);
|
||||
if (d_d0) cudaFree(d_d0);
|
||||
// CHECK: if (d_du0) hipFree(d_du0);
|
||||
if (d_du0) cudaFree(d_du0);
|
||||
// CHECK: if (d_dw0) hipFree(d_dw0);
|
||||
if (d_dw0) cudaFree(d_dw0);
|
||||
// CHECK: if (d_ds) hipFree(d_ds);
|
||||
if (d_ds) cudaFree(d_ds);
|
||||
// CHECK: if (d_dl) hipFree(d_dl);
|
||||
if (d_dl) cudaFree(d_dl);
|
||||
// CHECK: if (d_d) hipFree(d_d);
|
||||
if (d_d) cudaFree(d_d);
|
||||
// CHECK: if (d_du) hipFree(d_du);
|
||||
if (d_du) cudaFree(d_du);
|
||||
// CHECK: if (d_dw) hipFree(d_dw);
|
||||
if (d_dw) cudaFree(d_dw);
|
||||
// CHECK: if (d_B) hipFree(d_B);
|
||||
if (d_B) cudaFree(d_B);
|
||||
// CHECK: if (d_X) hipFree(d_X);
|
||||
if (d_X) cudaFree(d_X);
|
||||
// CHECK: if (cusparseH) hipsparseDestroy(cusparseH);
|
||||
if (cusparseH) cusparseDestroy(cusparseH);
|
||||
// CHECK: if (cublasH) hipblasDestroy(cublasH);
|
||||
if (cublasH) cublasDestroy(cublasH);
|
||||
// CHECK: if (stream) hipStreamDestroy(stream);
|
||||
if (stream) cudaStreamDestroy(stream);
|
||||
// CHECK: hipDeviceReset();
|
||||
cudaDeviceReset();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,327 @@
|
||||
// RUN: %run_test hipify "%s" "%t" %cuda_args
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
// CHECK: #include <hip/hip_runtime.h>
|
||||
#include <cuda_runtime.h>
|
||||
// CHECK: #include <hipsparse.h>
|
||||
#include <cusparse.h>
|
||||
|
||||
// NOTE: CUDA 10.0
|
||||
|
||||
/* compute | b - A*x|_inf */
|
||||
void residaul_eval(
|
||||
int n,
|
||||
// CHECK: const hipsparseMatDescr_t descrA,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const float *csrVal,
|
||||
const int *csrRowPtr,
|
||||
const int *csrColInd,
|
||||
const float *b,
|
||||
const float *x,
|
||||
float *r_nrminf_ptr)
|
||||
{
|
||||
// CHECK: const int base = (hipsparseGetMatIndexBase(descrA) != HIPSPARSE_INDEX_BASE_ONE) ? 0 : 1;
|
||||
const int base = (cusparseGetMatIndexBase(descrA) != CUSPARSE_INDEX_BASE_ONE) ? 0 : 1;
|
||||
// CHECK: const int lower = (HIPSPARSE_FILL_MODE_LOWER == hipsparseGetMatFillMode(descrA)) ? 1 : 0;
|
||||
const int lower = (CUSPARSE_FILL_MODE_LOWER == cusparseGetMatFillMode(descrA)) ? 1 : 0;
|
||||
// CHECK: const int unit = (HIPSPARSE_DIAG_TYPE_UNIT == hipsparseGetMatDiagType(descrA)) ? 1 : 0;
|
||||
const int unit = (CUSPARSE_DIAG_TYPE_UNIT == cusparseGetMatDiagType(descrA)) ? 1 : 0;
|
||||
|
||||
float r_nrminf = 0;
|
||||
for (int row = 0; row < n; row++) {
|
||||
const int start = csrRowPtr[row] - base;
|
||||
const int end = csrRowPtr[row + 1] - base;
|
||||
float dot = 0;
|
||||
for (int colidx = start; colidx < end; colidx++) {
|
||||
const int col = csrColInd[colidx] - base;
|
||||
float Aij = csrVal[colidx];
|
||||
float xj = x[col];
|
||||
if ((row == col) && unit) {
|
||||
Aij = 1.0;
|
||||
}
|
||||
int valid = (row >= col) && lower ||
|
||||
(row <= col) && !lower;
|
||||
if (valid) {
|
||||
dot += Aij * xj;
|
||||
}
|
||||
}
|
||||
float ri = b[row] - dot;
|
||||
r_nrminf = (r_nrminf > fabs(ri)) ? r_nrminf : fabs(ri);
|
||||
}
|
||||
*r_nrminf_ptr = r_nrminf;
|
||||
}
|
||||
|
||||
int main(int argc, char*argv[])
|
||||
{
|
||||
// CHECK: hipsparseHandle_t handle = NULL;
|
||||
cusparseHandle_t handle = NULL;
|
||||
// CHECK: hipStream_t stream = NULL;
|
||||
cudaStream_t stream = NULL;
|
||||
// CHECK: hipsparseMatDescr_t descrA = NULL;
|
||||
cusparseMatDescr_t descrA = NULL;
|
||||
// NOTE: CUDA 10.0
|
||||
// TODO: csrsm2Info_t info = NULL;
|
||||
csrsm2Info_t info = NULL;
|
||||
// CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS;
|
||||
cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS;
|
||||
// CHECK: hipError_t cudaStat1 = hipSuccess;
|
||||
cudaError_t cudaStat1 = cudaSuccess;
|
||||
const int nrhs = 2;
|
||||
const int n = 4;
|
||||
const int nnzA = 9;
|
||||
// CHECK: const hipsparseSolvePolicy_t policy = HIPSPARSE_SOLVE_POLICY_NO_LEVEL;
|
||||
const cusparseSolvePolicy_t policy = CUSPARSE_SOLVE_POLICY_NO_LEVEL;
|
||||
const float h_one = 1.0;
|
||||
/*
|
||||
* | 1 0 2 -3 |
|
||||
* | 0 4 0 0 |
|
||||
* A = | 5 0 6 7 |
|
||||
* | 0 8 0 9 |
|
||||
*
|
||||
* Regard A as a lower triangle matrix L with non-unit diagonal.
|
||||
* | 1 5 | | 1 5 |
|
||||
* Given B = | 2 6 |, X = L \ B = | 0.5 1.5 |
|
||||
* | 3 7 | | -0.3333 -3 |
|
||||
* | 4 8 | | 0 -0.4444 |
|
||||
*/
|
||||
const int csrRowPtrA[n + 1] = { 1, 4, 5, 8, 10 };
|
||||
const int csrColIndA[nnzA] = { 1, 3, 4, 2, 1, 3, 4, 2, 4 };
|
||||
const float csrValA[nnzA] = { 1, 2, -3, 4, 5, 6, 7, 8, 9 };
|
||||
const float B[n*nrhs] = { 1,2,3,4,5,6,7,8 };
|
||||
float X[n*nrhs];
|
||||
|
||||
int *d_csrRowPtrA = NULL;
|
||||
int *d_csrColIndA = NULL;
|
||||
float *d_csrValA = NULL;
|
||||
float *d_B = NULL;
|
||||
|
||||
size_t lworkInBytes = 0;
|
||||
char *d_work = NULL;
|
||||
|
||||
const int algo = 0; /* non-block version */
|
||||
|
||||
printf("example of csrsm2 \n");
|
||||
|
||||
/* step 1: create cusparse handle, bind a stream */
|
||||
// CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking);
|
||||
cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: status = hipsparseCreate(&handle);
|
||||
status = cusparseCreate(&handle);
|
||||
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == status);
|
||||
|
||||
status = cusparseSetStream(handle, stream);
|
||||
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == status);
|
||||
|
||||
// NOTE: CUDA 10.0
|
||||
// TODO: status = hipsparseCreateCsrsm2Info(&info);
|
||||
status = cusparseCreateCsrsm2Info(&info);
|
||||
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == status);
|
||||
|
||||
/* step 2: configuration of matrix A */
|
||||
status = cusparseCreateMatDescr(&descrA);
|
||||
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == status);
|
||||
/* A is base-1*/
|
||||
// CHECK: hipsparseSetMatIndexBase(descrA, HIPSPARSE_INDEX_BASE_ONE);
|
||||
cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ONE);
|
||||
// CHECK: hipsparseSetMatType(descrA, HIPSPARSE_MATRIX_TYPE_GENERAL);
|
||||
cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||
/* A is lower triangle */
|
||||
// CHECK: hipsparseSetMatFillMode(descrA, HIPSPARSE_FILL_MODE_LOWER);
|
||||
cusparseSetMatFillMode(descrA, CUSPARSE_FILL_MODE_LOWER);
|
||||
/* A has non unit diagonal */
|
||||
// CHECK: hipsparseSetMatDiagType(descrA, HIPSPARSE_DIAG_TYPE_NON_UNIT);
|
||||
cusparseSetMatDiagType(descrA, CUSPARSE_DIAG_TYPE_NON_UNIT);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_csrRowPtrA, sizeof(int)*(n + 1));
|
||||
cudaStat1 = cudaMalloc((void**)&d_csrRowPtrA, sizeof(int)*(n + 1));
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_csrColIndA, sizeof(int)*nnzA);
|
||||
cudaStat1 = cudaMalloc((void**)&d_csrColIndA, sizeof(int)*nnzA);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_csrValA, sizeof(float)*nnzA);
|
||||
cudaStat1 = cudaMalloc((void**)&d_csrValA, sizeof(float)*nnzA);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_B, sizeof(float)*n*nrhs);
|
||||
cudaStat1 = cudaMalloc((void**)&d_B, sizeof(float)*n*nrhs);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int)*(n + 1), hipMemcpyHostToDevice);
|
||||
cudaStat1 = cudaMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int)*(n + 1), cudaMemcpyHostToDevice);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMemcpy(d_csrColIndA, csrColIndA, sizeof(int)*nnzA, hipMemcpyHostToDevice);
|
||||
cudaStat1 = cudaMemcpy(d_csrColIndA, csrColIndA, sizeof(int)*nnzA, cudaMemcpyHostToDevice);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMemcpy(d_csrValA, csrValA, sizeof(float)*nnzA, hipMemcpyHostToDevice);
|
||||
cudaStat1 = cudaMemcpy(d_csrValA, csrValA, sizeof(float)*nnzA, cudaMemcpyHostToDevice);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: cudaStat1 = hipMemcpy(d_B, B, sizeof(float)*n*nrhs, hipMemcpyHostToDevice);
|
||||
cudaStat1 = cudaMemcpy(d_B, B, sizeof(float)*n*nrhs, cudaMemcpyHostToDevice);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
|
||||
/* step 3: query workspace */
|
||||
// NOTE: CUDA 10.0
|
||||
// TODO: status = hipsparseScsrsm2_bufferSizeExt(
|
||||
// CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
// CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
status = cusparseScsrsm2_bufferSizeExt(
|
||||
handle,
|
||||
algo,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE, /* transA */
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE, /* transB */
|
||||
n,
|
||||
nrhs,
|
||||
nnzA,
|
||||
&h_one,
|
||||
descrA,
|
||||
d_csrValA,
|
||||
d_csrRowPtrA,
|
||||
d_csrColIndA,
|
||||
d_B,
|
||||
n, /* ldb */
|
||||
info,
|
||||
policy,
|
||||
&lworkInBytes);
|
||||
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == status);
|
||||
|
||||
printf("lworkInBytes = %lld \n", (long long)lworkInBytes);
|
||||
// CHECK: if (NULL != d_work) { hipFree(d_work); }
|
||||
if (NULL != d_work) { cudaFree(d_work); }
|
||||
// CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes);
|
||||
cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
|
||||
/* step 4: analysis */
|
||||
// NOTE: CUDA 10.0
|
||||
// TODO: status = hipsparseScsrsm2_analysis(
|
||||
// CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
// CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
status = cusparseScsrsm2_analysis(
|
||||
handle,
|
||||
algo,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE, /* transA */
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE, /* transB */
|
||||
n,
|
||||
nrhs,
|
||||
nnzA,
|
||||
&h_one,
|
||||
descrA,
|
||||
d_csrValA,
|
||||
d_csrRowPtrA,
|
||||
d_csrColIndA,
|
||||
d_B,
|
||||
n, /* ldb */
|
||||
info,
|
||||
policy,
|
||||
d_work);
|
||||
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == status);
|
||||
/* step 5: solve L * X = B */
|
||||
// NOTE: CUDA 10.0
|
||||
// TODO: status = hipsparseScsrsm2_solve(
|
||||
// CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
// CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
status = cusparseScsrsm2_solve(
|
||||
handle,
|
||||
algo,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE, /* transA */
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE, /* transB */
|
||||
n,
|
||||
nrhs,
|
||||
nnzA,
|
||||
&h_one,
|
||||
descrA,
|
||||
d_csrValA,
|
||||
d_csrRowPtrA,
|
||||
d_csrColIndA,
|
||||
d_B,
|
||||
n, /* ldb */
|
||||
info,
|
||||
policy,
|
||||
d_work);
|
||||
// CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status);
|
||||
assert(CUSPARSE_STATUS_SUCCESS == status);
|
||||
// CHECK: cudaStat1 = hipDeviceSynchronize();
|
||||
cudaStat1 = cudaDeviceSynchronize();
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
|
||||
/* step 6:measure residual B - A*X */
|
||||
// CHECK: cudaStat1 = hipMemcpy(X, d_B, sizeof(float)*n*nrhs, hipMemcpyDeviceToHost);
|
||||
cudaStat1 = cudaMemcpy(X, d_B, sizeof(float)*n*nrhs, cudaMemcpyDeviceToHost);
|
||||
// CHECK: assert(hipSuccess == cudaStat1);
|
||||
assert(cudaSuccess == cudaStat1);
|
||||
// CHECK: hipDeviceSynchronize();
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
printf("==== x1 = inv(A)*b1 \n");
|
||||
for (int j = 0; j < n; j++) {
|
||||
printf("x1[%d] = %f\n", j, X[j]);
|
||||
}
|
||||
float r1_nrminf;
|
||||
residaul_eval(
|
||||
n,
|
||||
descrA,
|
||||
csrValA,
|
||||
csrRowPtrA,
|
||||
csrColIndA,
|
||||
B,
|
||||
X,
|
||||
&r1_nrminf
|
||||
);
|
||||
printf("|b1 - A*x1| = %E\n", r1_nrminf);
|
||||
|
||||
printf("==== x2 = inv(A)*b2 \n");
|
||||
for (int j = 0; j < n; j++) {
|
||||
printf("x2[%d] = %f\n", j, X[n + j]);
|
||||
}
|
||||
float r2_nrminf;
|
||||
residaul_eval(
|
||||
n,
|
||||
descrA,
|
||||
csrValA,
|
||||
csrRowPtrA,
|
||||
csrColIndA,
|
||||
B + n,
|
||||
X + n,
|
||||
&r2_nrminf
|
||||
);
|
||||
printf("|b2 - A*x2| = %E\n", r2_nrminf);
|
||||
|
||||
/* free resources */
|
||||
// CHECK: if (d_csrRowPtrA) hipFree(d_csrRowPtrA);
|
||||
if (d_csrRowPtrA) cudaFree(d_csrRowPtrA);
|
||||
// CHECK: if (d_csrColIndA) hipFree(d_csrColIndA);
|
||||
if (d_csrColIndA) cudaFree(d_csrColIndA);
|
||||
// CHECK: if (d_csrValA) hipFree(d_csrValA);
|
||||
if (d_csrValA) cudaFree(d_csrValA);
|
||||
// CHECK: if (d_B) hipFree(d_B);
|
||||
if (d_B) cudaFree(d_B);
|
||||
// CHECK: if (handle) hipsparseDestroy(handle);
|
||||
if (handle) cusparseDestroy(handle);
|
||||
// CHECK: if (stream) hipStreamDestroy(stream);
|
||||
if (stream) cudaStreamDestroy(stream);
|
||||
// CHECK: if (descrA) hipsparseDestroyMatDescr(descrA);
|
||||
if (descrA) cusparseDestroyMatDescr(descrA);
|
||||
// NOTE: CUDA 10.0
|
||||
// TODO: if (info) hipsparseDestroyCsrsm2Info(info);
|
||||
if (info) cusparseDestroyCsrsm2Info(info);
|
||||
// CHECK: hipDeviceReset();
|
||||
cudaDeviceReset();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -26,6 +26,9 @@ config.excludes = ['cmdparser.hpp']
|
||||
config.cuda_version = "@CUDA_VERSION@"
|
||||
if config.cuda_version not in ['10.0']:
|
||||
config.excludes.append('cuSPARSE_08.cu')
|
||||
config.excludes.append('cuSPARSE_09.cu')
|
||||
config.excludes.append('cuSPARSE_10.cu')
|
||||
config.excludes.append('cuSPARSE_11.cu')
|
||||
|
||||
# test_exec_root: The path where tests are located (default is the test suite root).
|
||||
#config.test_exec_root = config.test_source_root
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle