[HIPIFY][BLAS][tests] Add tests on hipifying to 'roc'

This commit is contained in:
Evgeny Mankov
2019-02-07 19:25:23 +03:00
bovenliggende dd5928318f
commit 72c41d4ebf
6 gewijzigde bestanden met toevoegingen van 288 en 5 verwijderingen
@@ -8,12 +8,13 @@ set HIPIFY=%1
set IN_FILE=%2
set TMP_FILE=%3
set CUDA_ROOT=%4
set ROC=%5
set all_args=%*
call set clang_args=%%all_args:*%5=%%
set clang_args=%5%clang_args%
call set clang_args=%%all_args:*%6=%%
set clang_args=%6%clang_args%
%HIPIFY% -o=%TMP_FILE% %IN_FILE% %CUDA_ROOT% -- %clang_args%
%HIPIFY% -o=%TMP_FILE% %IN_FILE% %CUDA_ROOT% %ROC% -- %clang_args%
if errorlevel 1 (echo Error: hipify-clang.exe failed with exit code: %errorlevel% && exit /b %errorlevel%)
findstr /v /r /c:"[ ]*//[ ]*[CHECK*|RUN]" %TMP_FILE% | %FILE_CHECK% %IN_FILE%
@@ -10,8 +10,9 @@ HIPIFY=$1
IN_FILE=$2
TMP_FILE=$3
CUDA_ROOT=$4
shift 4
ROC=$5
shift 5
# Remaining args are the ones to forward to clang proper.
$HIPIFY -o=$TMP_FILE $IN_FILE $CUDA_ROOT -- $@ && cat $TMP_FILE | sed -Ee 's|//.+|// |g' | FileCheck $IN_FILE
$HIPIFY -o=$TMP_FILE $IN_FILE $CUDA_ROOT $ROC -- $@ && cat $TMP_FILE | sed -Ee 's|//.+|// |g' | FileCheck $IN_FILE
@@ -1,5 +1,6 @@
// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
// CHECK: #include <hip/hip_runtime.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
@@ -33,6 +34,7 @@ int main(void) {
}
// cublasInit is not supported yet
cublasInit();
// cublasAlloc is not supported yet
stat = cublasAlloc(M*N, sizeof(*a), (void**)&devPtrA);
// CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) {
if (stat != CUBLAS_STATUS_SUCCESS) {
@@ -0,0 +1,81 @@
// RUN: %run_test hipify "%s" "%t" %hipify_args "-roc" %clang_args
// CHECK: #include <hip/hip_runtime.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
// CHECK: #include "rocblas.h"
#include "cublas.h"
#define M 6
#define N 5
#define IDX2C(i,j,ld) (((j)*(ld))+(i))
static __inline__ void modify(float *m, int ldm, int n, int p, int q, float
alpha, float beta) {
// CHECK: rocblas_sscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm);
// CHECK: rocblas_sscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1);
cublasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm);
cublasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1);
}
int main(void) {
int i, j;
// CHECK: rocblas_status stat;
cublasStatus stat;
float* devPtrA;
float* a = 0;
a = (float *)malloc(M * N * sizeof(*a));
if (!a) {
printf("host memory allocation failed");
return EXIT_FAILURE;
}
for (j = 0; j < N; j++) {
for (i = 0; i < M; i++) {
a[IDX2C(i, j, M)] = (float)(i * M + j + 1);
}
}
// cublasInit is not supported yet
cublasInit();
// cublasAlloc is not supported yet
stat = cublasAlloc(M*N, sizeof(*a), (void**)&devPtrA);
// CHECK: if (stat != rocblas_status_success) {
if (stat != CUBLAS_STATUS_SUCCESS) {
printf("device memory allocation failed");
// cublasShutdown is not supported yet
cublasShutdown();
return EXIT_FAILURE;
}
// CHECK: stat = rocblas_set_matrix(M, N, sizeof(*a), a, M, devPtrA, M);
stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M);
// CHECK: if (stat != rocblas_status_success) {
if (stat != CUBLAS_STATUS_SUCCESS) {
printf("data download failed");
// cublasFree is not supported yet
cublasFree(devPtrA);
// cublasShutdown is not supported yet
cublasShutdown();
return EXIT_FAILURE;
}
modify(devPtrA, M, N, 1, 2, 16.0f, 12.0f);
// CHECK: stat = rocblas_get_matrix(M, N, sizeof(*a), devPtrA, M, a, M);
stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M);
// CHECK: if (stat != rocblas_status_success) {
if (stat != CUBLAS_STATUS_SUCCESS) {
printf("data upload failed");
// cublasFree is not supported yet
cublasFree(devPtrA);
// cublasShutdown is not supported yet
cublasShutdown();
return EXIT_FAILURE;
}
// cublasFree is not supported yet
cublasFree(devPtrA);
// cublasShutdown is not supported yet
cublasShutdown();
for (j = 0; j < N; j++) {
for (i = 0; i < M; i++) {
printf("%7.0f", a[IDX2C(i, j, M)]);
}
printf("\n");
}
free(a);
return EXIT_SUCCESS;
}
@@ -0,0 +1,90 @@
// RUN: %run_test hipify "%s" "%t" %hipify_args "-roc" %clang_args
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
// CHECK: #include <hip/hip_runtime.h>
#include <cuda_runtime.h>
// CHECK: #include "rocblas.h"
#include "cublas_v2.h"
#define M 6
#define N 5
#define IDX2F(i,j,ld) ((((j)-1)*(ld))+((i)-1))
// CHECK: static __inline__ void modify(rocblas_handle handle, float *m, int ldm, int
static __inline__ void modify(cublasHandle_t handle, float *m, int ldm, int
n, int p, int q, float alpha, float beta) {
// CHECK: rocblas_sscal(handle, n - p + 1, &alpha, &m[IDX2F(p, q, ldm)], ldm);
// CHECK: rocblas_sscal(handle, ldm - p + 1, &beta, &m[IDX2F(p, q, ldm)], 1);
cublasSscal(handle, n - p + 1, &alpha, &m[IDX2F(p, q, ldm)], ldm);
cublasSscal(handle, ldm - p + 1, &beta, &m[IDX2F(p, q, ldm)], 1);
}
int main(void) {
// CHECK: hipError_t cudaStat;
// CHECK: rocblas_status stat;
// CHECK: rocblas_handle handle;
cudaError_t cudaStat;
cublasStatus_t stat;
cublasHandle_t handle;
int i, j;
float* devPtrA;
float* a = 0;
a = (float *)malloc(M * N * sizeof(*a));
if (!a) {
printf("host memory allocation failed");
return EXIT_FAILURE;
}
for (j = 1; j <= N; j++) {
for (i = 1; i <= M; i++) {
a[IDX2F(i, j, M)] = (float)((i - 1) * M + j);
}
}
// CHECK: cudaStat = hipMalloc((void**)&devPtrA, M*N * sizeof(*a));
cudaStat = cudaMalloc((void**)&devPtrA, M*N * sizeof(*a));
// CHECK: if (cudaStat != hipSuccess) {
if (cudaStat != cudaSuccess) {
printf("device memory allocation failed");
return EXIT_FAILURE;
}
// CHECK: stat = rocblas_create_handle(&handle);
stat = cublasCreate(&handle);
// CHECK: if (stat != rocblas_status_success) {
if (stat != CUBLAS_STATUS_SUCCESS) {
printf("CUBLAS initialization failed\n");
return EXIT_FAILURE;
}
// CHECK: stat = rocblas_set_matrix(M, N, sizeof(*a), a, M, devPtrA, M);
stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M);
// CHECK: if (stat != rocblas_status_success) {
if (stat != CUBLAS_STATUS_SUCCESS) {
printf("data download failed");
// CHECK: hipFree(devPtrA);
// CHECK: rocblas_destroy_handle(handle);
cudaFree(devPtrA);
cublasDestroy(handle);
return EXIT_FAILURE;
}
modify(handle, devPtrA, M, N, 2, 3, 16.0f, 12.0f);
// CHECK: stat = rocblas_get_matrix(M, N, sizeof(*a), devPtrA, M, a, M);
stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M);
// CHECK: if (stat != rocblas_status_success) {
if (stat != CUBLAS_STATUS_SUCCESS) {
printf("data upload failed");
// CHECK: hipFree(devPtrA);
// CHECK: rocblas_destroy_handle(handle);
cudaFree(devPtrA);
cublasDestroy(handle);
return EXIT_FAILURE;
}
// CHECK: hipFree(devPtrA);
// CHECK: rocblas_destroy_handle(handle);
cudaFree(devPtrA);
cublasDestroy(handle);
for (j = 1; j <= N; j++) {
for (i = 1; i <= M; i++) {
printf("%7.0f", a[IDX2F(i, j, M)]);
}
printf("\n");
}
free(a);
return EXIT_SUCCESS;
}
@@ -0,0 +1,108 @@
// RUN: %run_test hipify "%s" "%t" %hipify_args "-roc" %clang_args
#include <stdio.h>
#include <stdlib.h>
// CHECK: #include <hip/hip_runtime.h>
#include <cuda_runtime.h>
// CHECK: #include "rocblas.h"
#include "cublas_v2.h"
#define IDX2C(i,j,ld) (((j)*(ld))+(i))
#define m 6
#define n 4
#define k 5
int main(void) {
// CHECK: hipError_t cudaStat;
// CHECK: rocblas_status stat;
// CHECK: rocblas_handle handle;
cudaError_t cudaStat;
cublasStatus_t stat;
cublasHandle_t handle;
int i, j;
float * a;
float * b;
float * c;
a = (float *)malloc(m*k * sizeof(float));
b = (float *)malloc(k*n * sizeof(float));
c = (float *)malloc(m*n * sizeof(float));
int ind = 11;
for (j = 0; j<k; j++) {
for (i = 0; i<m; i++) {
a[IDX2C(i, j, m)] = (float)ind++;
}
}
printf("a:\n");
for (i = 0; i<m; i++) {
for (j = 0; j<k; j++) {
printf(" %5.0f", a[IDX2C(i, j, m)]);
}
printf("\n");
}
ind = 11;
for (j = 0; j<n; j++) {
for (i = 0; i<k; i++) {
b[IDX2C(i, j, k)] = (float)ind++;
}
}
printf("b:\n");
for (i = 0; i<k; i++) {
for (j = 0; j<n; j++) {
printf(" %5.0f", b[IDX2C(i, j, k)]);
}
printf("\n");
}
ind = 11;
for (j = 0; j<n; j++) {
for (i = 0; i<m; i++) {
c[IDX2C(i, j, m)] = (float)ind++;
}
}
printf("c:\n");
for (i = 0; i<m; i++) {
for (j = 0; j<n; j++) {
printf(" %5.0f", c[IDX2C(i, j, m)]);
}
printf("\n");
}
float * d_a;
float * d_b;
float * d_c;
// CHECK: cudaStat = hipMalloc((void **)& d_a, m*k * sizeof(*a));
// CHECK: cudaStat = hipMalloc((void **)& d_b, k*n * sizeof(*b));
// CHECK: cudaStat = hipMalloc((void **)& d_c, m*n * sizeof(*c));
cudaStat = cudaMalloc((void **)& d_a, m*k * sizeof(*a));
cudaStat = cudaMalloc((void **)& d_b, k*n * sizeof(*b));
cudaStat = cudaMalloc((void **)& d_c, m*n * sizeof(*c));
// CHECK: stat = rocblas_create_handle(&handle);
stat = cublasCreate(&handle);
// CHECK: stat = rocblas_set_matrix(m, k, sizeof(*a), a, m, d_a, m);
// CHECK: stat = rocblas_set_matrix(k, n, sizeof(*b), b, k, d_b, k);
// CHECK: stat = rocblas_set_matrix(m, n, sizeof(*c), c, m, d_c, m);
stat = cublasSetMatrix(m, k, sizeof(*a), a, m, d_a, m);
stat = cublasSetMatrix(k, n, sizeof(*b), b, k, d_b, k);
stat = cublasSetMatrix(m, n, sizeof(*c), c, m, d_c, m);
float al = 1.0f;
float bet = 1.0f;
// CHECK: stat = rocblas_sgemm(handle, rocblas_operation_none, rocblas_operation_none, m, n, k, &al, d_a, m, d_b, k, &bet, d_c, m);
stat = cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &al, d_a, m, d_b, k, &bet, d_c, m);
// CHECK: stat = rocblas_get_matrix(m, n, sizeof(*c), d_c, m, c, m);
stat = cublasGetMatrix(m, n, sizeof(*c), d_c, m, c, m);
printf("c after Sgemm :\n");
for (i = 0; i<m; i++) {
for (j = 0; j<n; j++) {
printf(" %7.0f", c[IDX2C(i, j, m)]);
}
printf("\n");
}
// CHECK: hipFree(d_a);
// CHECK: hipFree(d_b);
// CHECK: hipFree(d_c);
// CHECK: rocblas_destroy_handle(handle);
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
cublasDestroy(handle);
free(a);
free(b);
free(c);
return EXIT_SUCCESS;
}