[HIPIFY][BLAS][tests] Add tests on hipifying to 'roc'
[ROCm/hip commit: 385fe36f2a]
Этот коммит содержится в:
@@ -8,12 +8,13 @@ set HIPIFY=%1
|
||||
set IN_FILE=%2
|
||||
set TMP_FILE=%3
|
||||
set CUDA_ROOT=%4
|
||||
set ROC=%5
|
||||
|
||||
set all_args=%*
|
||||
call set clang_args=%%all_args:*%5=%%
|
||||
set clang_args=%5%clang_args%
|
||||
call set clang_args=%%all_args:*%6=%%
|
||||
set clang_args=%6%clang_args%
|
||||
|
||||
%HIPIFY% -o=%TMP_FILE% %IN_FILE% %CUDA_ROOT% -- %clang_args%
|
||||
%HIPIFY% -o=%TMP_FILE% %IN_FILE% %CUDA_ROOT% %ROC% -- %clang_args%
|
||||
if errorlevel 1 (echo Error: hipify-clang.exe failed with exit code: %errorlevel% && exit /b %errorlevel%)
|
||||
|
||||
findstr /v /r /c:"[ ]*//[ ]*[CHECK*|RUN]" %TMP_FILE% | %FILE_CHECK% %IN_FILE%
|
||||
|
||||
@@ -10,8 +10,9 @@ HIPIFY=$1
|
||||
IN_FILE=$2
|
||||
TMP_FILE=$3
|
||||
CUDA_ROOT=$4
|
||||
shift 4
|
||||
ROC=$5
|
||||
shift 5
|
||||
|
||||
# Remaining args are the ones to forward to clang proper.
|
||||
|
||||
$HIPIFY -o=$TMP_FILE $IN_FILE $CUDA_ROOT -- $@ && cat $TMP_FILE | sed -Ee 's|//.+|// |g' | FileCheck $IN_FILE
|
||||
$HIPIFY -o=$TMP_FILE $IN_FILE $CUDA_ROOT $ROC -- $@ && cat $TMP_FILE | sed -Ee 's|//.+|// |g' | FileCheck $IN_FILE
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
|
||||
|
||||
// CHECK: #include <hip/hip_runtime.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
@@ -33,6 +34,7 @@ int main(void) {
|
||||
}
|
||||
// cublasInit is not supported yet
|
||||
cublasInit();
|
||||
// cublasAlloc is not supported yet
|
||||
stat = cublasAlloc(M*N, sizeof(*a), (void**)&devPtrA);
|
||||
// CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) {
|
||||
if (stat != CUBLAS_STATUS_SUCCESS) {
|
||||
|
||||
+81
@@ -0,0 +1,81 @@
|
||||
// RUN: %run_test hipify "%s" "%t" %hipify_args "-roc" %clang_args
|
||||
|
||||
// CHECK: #include <hip/hip_runtime.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
// CHECK: #include "rocblas.h"
|
||||
#include "cublas.h"
|
||||
#define M 6
|
||||
#define N 5
|
||||
#define IDX2C(i,j,ld) (((j)*(ld))+(i))
|
||||
static __inline__ void modify(float *m, int ldm, int n, int p, int q, float
|
||||
alpha, float beta) {
|
||||
// CHECK: rocblas_sscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm);
|
||||
// CHECK: rocblas_sscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1);
|
||||
cublasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm);
|
||||
cublasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1);
|
||||
}
|
||||
int main(void) {
|
||||
int i, j;
|
||||
// CHECK: rocblas_status stat;
|
||||
cublasStatus stat;
|
||||
float* devPtrA;
|
||||
float* a = 0;
|
||||
a = (float *)malloc(M * N * sizeof(*a));
|
||||
if (!a) {
|
||||
printf("host memory allocation failed");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
for (j = 0; j < N; j++) {
|
||||
for (i = 0; i < M; i++) {
|
||||
a[IDX2C(i, j, M)] = (float)(i * M + j + 1);
|
||||
}
|
||||
}
|
||||
// cublasInit is not supported yet
|
||||
cublasInit();
|
||||
// cublasAlloc is not supported yet
|
||||
stat = cublasAlloc(M*N, sizeof(*a), (void**)&devPtrA);
|
||||
// CHECK: if (stat != rocblas_status_success) {
|
||||
if (stat != CUBLAS_STATUS_SUCCESS) {
|
||||
printf("device memory allocation failed");
|
||||
// cublasShutdown is not supported yet
|
||||
cublasShutdown();
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
// CHECK: stat = rocblas_set_matrix(M, N, sizeof(*a), a, M, devPtrA, M);
|
||||
stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M);
|
||||
// CHECK: if (stat != rocblas_status_success) {
|
||||
if (stat != CUBLAS_STATUS_SUCCESS) {
|
||||
printf("data download failed");
|
||||
// cublasFree is not supported yet
|
||||
cublasFree(devPtrA);
|
||||
// cublasShutdown is not supported yet
|
||||
cublasShutdown();
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
modify(devPtrA, M, N, 1, 2, 16.0f, 12.0f);
|
||||
// CHECK: stat = rocblas_get_matrix(M, N, sizeof(*a), devPtrA, M, a, M);
|
||||
stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M);
|
||||
// CHECK: if (stat != rocblas_status_success) {
|
||||
if (stat != CUBLAS_STATUS_SUCCESS) {
|
||||
printf("data upload failed");
|
||||
// cublasFree is not supported yet
|
||||
cublasFree(devPtrA);
|
||||
// cublasShutdown is not supported yet
|
||||
cublasShutdown();
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
// cublasFree is not supported yet
|
||||
cublasFree(devPtrA);
|
||||
// cublasShutdown is not supported yet
|
||||
cublasShutdown();
|
||||
for (j = 0; j < N; j++) {
|
||||
for (i = 0; i < M; i++) {
|
||||
printf("%7.0f", a[IDX2C(i, j, M)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
free(a);
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
+90
@@ -0,0 +1,90 @@
|
||||
// RUN: %run_test hipify "%s" "%t" %hipify_args "-roc" %clang_args
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
// CHECK: #include <hip/hip_runtime.h>
|
||||
#include <cuda_runtime.h>
|
||||
// CHECK: #include "rocblas.h"
|
||||
#include "cublas_v2.h"
|
||||
#define M 6
|
||||
#define N 5
|
||||
#define IDX2F(i,j,ld) ((((j)-1)*(ld))+((i)-1))
|
||||
// CHECK: static __inline__ void modify(rocblas_handle handle, float *m, int ldm, int
|
||||
static __inline__ void modify(cublasHandle_t handle, float *m, int ldm, int
|
||||
n, int p, int q, float alpha, float beta) {
|
||||
// CHECK: rocblas_sscal(handle, n - p + 1, &alpha, &m[IDX2F(p, q, ldm)], ldm);
|
||||
// CHECK: rocblas_sscal(handle, ldm - p + 1, &beta, &m[IDX2F(p, q, ldm)], 1);
|
||||
cublasSscal(handle, n - p + 1, &alpha, &m[IDX2F(p, q, ldm)], ldm);
|
||||
cublasSscal(handle, ldm - p + 1, &beta, &m[IDX2F(p, q, ldm)], 1);
|
||||
}
|
||||
int main(void) {
|
||||
// CHECK: hipError_t cudaStat;
|
||||
// CHECK: rocblas_status stat;
|
||||
// CHECK: rocblas_handle handle;
|
||||
cudaError_t cudaStat;
|
||||
cublasStatus_t stat;
|
||||
cublasHandle_t handle;
|
||||
int i, j;
|
||||
float* devPtrA;
|
||||
float* a = 0;
|
||||
a = (float *)malloc(M * N * sizeof(*a));
|
||||
if (!a) {
|
||||
printf("host memory allocation failed");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
for (j = 1; j <= N; j++) {
|
||||
for (i = 1; i <= M; i++) {
|
||||
a[IDX2F(i, j, M)] = (float)((i - 1) * M + j);
|
||||
}
|
||||
}
|
||||
// CHECK: cudaStat = hipMalloc((void**)&devPtrA, M*N * sizeof(*a));
|
||||
cudaStat = cudaMalloc((void**)&devPtrA, M*N * sizeof(*a));
|
||||
// CHECK: if (cudaStat != hipSuccess) {
|
||||
if (cudaStat != cudaSuccess) {
|
||||
printf("device memory allocation failed");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
// CHECK: stat = rocblas_create_handle(&handle);
|
||||
stat = cublasCreate(&handle);
|
||||
// CHECK: if (stat != rocblas_status_success) {
|
||||
if (stat != CUBLAS_STATUS_SUCCESS) {
|
||||
printf("CUBLAS initialization failed\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
// CHECK: stat = rocblas_set_matrix(M, N, sizeof(*a), a, M, devPtrA, M);
|
||||
stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M);
|
||||
// CHECK: if (stat != rocblas_status_success) {
|
||||
if (stat != CUBLAS_STATUS_SUCCESS) {
|
||||
printf("data download failed");
|
||||
// CHECK: hipFree(devPtrA);
|
||||
// CHECK: rocblas_destroy_handle(handle);
|
||||
cudaFree(devPtrA);
|
||||
cublasDestroy(handle);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
modify(handle, devPtrA, M, N, 2, 3, 16.0f, 12.0f);
|
||||
// CHECK: stat = rocblas_get_matrix(M, N, sizeof(*a), devPtrA, M, a, M);
|
||||
stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M);
|
||||
// CHECK: if (stat != rocblas_status_success) {
|
||||
if (stat != CUBLAS_STATUS_SUCCESS) {
|
||||
printf("data upload failed");
|
||||
// CHECK: hipFree(devPtrA);
|
||||
// CHECK: rocblas_destroy_handle(handle);
|
||||
cudaFree(devPtrA);
|
||||
cublasDestroy(handle);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
// CHECK: hipFree(devPtrA);
|
||||
// CHECK: rocblas_destroy_handle(handle);
|
||||
cudaFree(devPtrA);
|
||||
cublasDestroy(handle);
|
||||
for (j = 1; j <= N; j++) {
|
||||
for (i = 1; i <= M; i++) {
|
||||
printf("%7.0f", a[IDX2F(i, j, M)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
free(a);
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
+108
@@ -0,0 +1,108 @@
|
||||
// RUN: %run_test hipify "%s" "%t" %hipify_args "-roc" %clang_args
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
// CHECK: #include <hip/hip_runtime.h>
|
||||
#include <cuda_runtime.h>
|
||||
// CHECK: #include "rocblas.h"
|
||||
#include "cublas_v2.h"
|
||||
#define IDX2C(i,j,ld) (((j)*(ld))+(i))
|
||||
#define m 6
|
||||
#define n 4
|
||||
#define k 5
|
||||
int main(void) {
|
||||
// CHECK: hipError_t cudaStat;
|
||||
// CHECK: rocblas_status stat;
|
||||
// CHECK: rocblas_handle handle;
|
||||
cudaError_t cudaStat;
|
||||
cublasStatus_t stat;
|
||||
cublasHandle_t handle;
|
||||
int i, j;
|
||||
float * a;
|
||||
float * b;
|
||||
float * c;
|
||||
a = (float *)malloc(m*k * sizeof(float));
|
||||
b = (float *)malloc(k*n * sizeof(float));
|
||||
c = (float *)malloc(m*n * sizeof(float));
|
||||
int ind = 11;
|
||||
for (j = 0; j<k; j++) {
|
||||
for (i = 0; i<m; i++) {
|
||||
a[IDX2C(i, j, m)] = (float)ind++;
|
||||
}
|
||||
}
|
||||
printf("a:\n");
|
||||
for (i = 0; i<m; i++) {
|
||||
for (j = 0; j<k; j++) {
|
||||
printf(" %5.0f", a[IDX2C(i, j, m)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
ind = 11;
|
||||
for (j = 0; j<n; j++) {
|
||||
for (i = 0; i<k; i++) {
|
||||
b[IDX2C(i, j, k)] = (float)ind++;
|
||||
}
|
||||
}
|
||||
printf("b:\n");
|
||||
for (i = 0; i<k; i++) {
|
||||
for (j = 0; j<n; j++) {
|
||||
printf(" %5.0f", b[IDX2C(i, j, k)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
ind = 11;
|
||||
for (j = 0; j<n; j++) {
|
||||
for (i = 0; i<m; i++) {
|
||||
c[IDX2C(i, j, m)] = (float)ind++;
|
||||
}
|
||||
}
|
||||
printf("c:\n");
|
||||
for (i = 0; i<m; i++) {
|
||||
for (j = 0; j<n; j++) {
|
||||
printf(" %5.0f", c[IDX2C(i, j, m)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
float * d_a;
|
||||
float * d_b;
|
||||
float * d_c;
|
||||
// CHECK: cudaStat = hipMalloc((void **)& d_a, m*k * sizeof(*a));
|
||||
// CHECK: cudaStat = hipMalloc((void **)& d_b, k*n * sizeof(*b));
|
||||
// CHECK: cudaStat = hipMalloc((void **)& d_c, m*n * sizeof(*c));
|
||||
cudaStat = cudaMalloc((void **)& d_a, m*k * sizeof(*a));
|
||||
cudaStat = cudaMalloc((void **)& d_b, k*n * sizeof(*b));
|
||||
cudaStat = cudaMalloc((void **)& d_c, m*n * sizeof(*c));
|
||||
// CHECK: stat = rocblas_create_handle(&handle);
|
||||
stat = cublasCreate(&handle);
|
||||
// CHECK: stat = rocblas_set_matrix(m, k, sizeof(*a), a, m, d_a, m);
|
||||
// CHECK: stat = rocblas_set_matrix(k, n, sizeof(*b), b, k, d_b, k);
|
||||
// CHECK: stat = rocblas_set_matrix(m, n, sizeof(*c), c, m, d_c, m);
|
||||
stat = cublasSetMatrix(m, k, sizeof(*a), a, m, d_a, m);
|
||||
stat = cublasSetMatrix(k, n, sizeof(*b), b, k, d_b, k);
|
||||
stat = cublasSetMatrix(m, n, sizeof(*c), c, m, d_c, m);
|
||||
float al = 1.0f;
|
||||
float bet = 1.0f;
|
||||
// CHECK: stat = rocblas_sgemm(handle, rocblas_operation_none, rocblas_operation_none, m, n, k, &al, d_a, m, d_b, k, &bet, d_c, m);
|
||||
stat = cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &al, d_a, m, d_b, k, &bet, d_c, m);
|
||||
// CHECK: stat = rocblas_get_matrix(m, n, sizeof(*c), d_c, m, c, m);
|
||||
stat = cublasGetMatrix(m, n, sizeof(*c), d_c, m, c, m);
|
||||
printf("c after Sgemm :\n");
|
||||
for (i = 0; i<m; i++) {
|
||||
for (j = 0; j<n; j++) {
|
||||
printf(" %7.0f", c[IDX2C(i, j, m)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
// CHECK: hipFree(d_a);
|
||||
// CHECK: hipFree(d_b);
|
||||
// CHECK: hipFree(d_c);
|
||||
// CHECK: rocblas_destroy_handle(handle);
|
||||
cudaFree(d_a);
|
||||
cudaFree(d_b);
|
||||
cudaFree(d_c);
|
||||
cublasDestroy(handle);
|
||||
free(a);
|
||||
free(b);
|
||||
free(c);
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
Ссылка в новой задаче
Block a user