SWDEV-402381 - Add hipCheckErrors for HIP API calls in samples (#375)
Change-Id: I335d7e780362fc59fd2d90939b4c8b8a7231ffc7
This commit is contained in:
committed by
GitHub
parent
b8fb6f88b9
commit
7cc53f992f
@@ -48,5 +48,7 @@ set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE})
|
||||
# Create the excutable
|
||||
add_executable(bit_extract bit_extract.cpp)
|
||||
|
||||
target_include_directories(bit_extract PRIVATE ../../common)
|
||||
|
||||
# Link with HIP
|
||||
target_link_libraries(bit_extract hip::host)
|
||||
|
||||
@@ -29,6 +29,7 @@ ifeq (,$(HIP_PATH))
|
||||
endif
|
||||
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform)
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
INCLUDES := -I../../common
|
||||
|
||||
# Show how to use PLATFORM to specify different options for each compiler:
|
||||
ifeq (${HIP_PLATFORM}, nvcc)
|
||||
@@ -38,7 +39,7 @@ endif
|
||||
EXE=bit_extract
|
||||
|
||||
$(EXE): bit_extract.cpp
|
||||
$(HIPCC) $(HIPCC_FLAGS) $< -o $@
|
||||
$(HIPCC) $(HIPCC_FLAGS) $(INCLUDES) $< -o $@
|
||||
|
||||
all: $(EXE)
|
||||
|
||||
|
||||
@@ -23,16 +23,7 @@ THE SOFTWARE.
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define CHECK(cmd) \
|
||||
{ \
|
||||
hipError_t error = cmd; \
|
||||
if (error != hipSuccess) { \
|
||||
fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error, \
|
||||
__FILE__, __LINE__); \
|
||||
exit(EXIT_FAILURE); \
|
||||
} \
|
||||
}
|
||||
#include "hip_helper.h"
|
||||
|
||||
__global__ void bit_extract_kernel(uint32_t* C_d, const uint32_t* A_d, size_t N) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
@@ -69,28 +60,28 @@ int main(int argc, char* argv[]) {
|
||||
#endif
|
||||
|
||||
int deviceId;
|
||||
CHECK(hipGetDevice(&deviceId));
|
||||
checkHipErrors(hipGetDevice(&deviceId));
|
||||
hipDeviceProp_t props;
|
||||
CHECK(hipGetDeviceProperties(&props, deviceId));
|
||||
checkHipErrors(hipGetDeviceProperties(&props, deviceId));
|
||||
printf("info: running on device #%d %s\n", deviceId, props.name);
|
||||
|
||||
|
||||
printf("info: allocate host mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0);
|
||||
A_h = (uint32_t*)malloc(Nbytes);
|
||||
CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
checkHipErrors(A_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
C_h = (uint32_t*)malloc(Nbytes);
|
||||
CHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
checkHipErrors(C_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
A_h[i] = i;
|
||||
}
|
||||
|
||||
printf("info: allocate device mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0);
|
||||
CHECK(hipMalloc(&A_d, Nbytes));
|
||||
CHECK(hipMalloc(&C_d, Nbytes));
|
||||
checkHipErrors(hipMalloc(&A_d, Nbytes));
|
||||
checkHipErrors(hipMalloc(&C_d, Nbytes));
|
||||
|
||||
printf("info: copy Host2Device\n");
|
||||
CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
checkHipErrors(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
printf("info: launch 'bit_extract_kernel' \n");
|
||||
const unsigned blocks = 512;
|
||||
@@ -98,7 +89,7 @@ int main(int argc, char* argv[]) {
|
||||
hipLaunchKernelGGL(bit_extract_kernel, dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N);
|
||||
|
||||
printf("info: copy Device2Host\n");
|
||||
CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
checkHipErrors(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
printf("info: check result\n");
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
@@ -106,7 +97,7 @@ int main(int argc, char* argv[]) {
|
||||
if (C_h[i] != Agold) {
|
||||
fprintf(stderr, "mismatch detected.\n");
|
||||
printf("%zu: %08x =? %08x (Ain=%08x)\n", i, C_h[i], Agold, A_h[i]);
|
||||
CHECK(hipErrorUnknown);
|
||||
checkHipErrors(hipErrorUnknown);
|
||||
}
|
||||
}
|
||||
printf("PASSED!\n");
|
||||
|
||||
@@ -22,6 +22,8 @@ project(module_api)
|
||||
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
include_directories(../../common)
|
||||
|
||||
if (NOT DEFINED ROCM_PATH )
|
||||
set ( ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory." )
|
||||
endif ()
|
||||
|
||||
@@ -27,20 +27,21 @@ ifeq (,$(HIP_PATH))
|
||||
endif
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
|
||||
INCLUDES := -I../../common
|
||||
|
||||
all: vcpy_kernel.code runKernel.hip.out launchKernelHcc.hip.out defaultDriver.hip.out
|
||||
|
||||
runKernel.hip.out: runKernel.cpp
|
||||
$(HIPCC) $(HIPCC_FLAGS) $< -o $@
|
||||
$(HIPCC) $(HIPCC_FLAGS) $(INCLUDES) $< -o $@
|
||||
|
||||
launchKernelHcc.hip.out: launchKernelHcc.cpp
|
||||
$(HIPCC) $(HIPCC_FLAGS) $< -o $@
|
||||
$(HIPCC) $(HIPCC_FLAGS) $(INCLUDES) $< -o $@
|
||||
|
||||
defaultDriver.hip.out: defaultDriver.cpp
|
||||
$(HIPCC) $(HIPCC_FLAGS) $< -o $@
|
||||
$(HIPCC) $(HIPCC_FLAGS) $(INCLUDES) $< -o $@
|
||||
|
||||
vcpy_kernel.code: vcpy_kernel.cpp
|
||||
$(HIPCC) --genco $(GENCO_FLAGS) $^ -o $@
|
||||
$(HIPCC) --genco $(GENCO_FLAGS) $(INCLUDES) $^ -o $@
|
||||
|
||||
clean:
|
||||
rm -f *.code *.out
|
||||
|
||||
@@ -24,6 +24,7 @@ THE SOFTWARE.
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include "hip_helper.h"
|
||||
|
||||
#define LEN 64
|
||||
#define SIZE LEN << 2
|
||||
@@ -45,25 +46,25 @@ int main() {
|
||||
hipInit(0);
|
||||
hipDevice_t device;
|
||||
hipCtx_t context;
|
||||
hipDeviceGet(&device, 0);
|
||||
hipCtxCreate(&context, 0, device);
|
||||
checkHipErrors(hipDeviceGet(&device, 0));
|
||||
checkHipErrors(hipCtxCreate(&context, 0, device));
|
||||
|
||||
hipMalloc((void**)&Ad, SIZE);
|
||||
hipMalloc((void**)&Bd, SIZE);
|
||||
checkHipErrors(hipMalloc((void**)&Ad, SIZE));
|
||||
checkHipErrors(hipMalloc((void**)&Bd, SIZE));
|
||||
|
||||
hipMemcpyHtoD(Ad, A, SIZE);
|
||||
hipMemcpyHtoD(Bd, B, SIZE);
|
||||
checkHipErrors(hipMemcpyHtoD(Ad, A, SIZE));
|
||||
checkHipErrors(hipMemcpyHtoD(Bd, B, SIZE));
|
||||
|
||||
hipModule_t Module;
|
||||
hipFunction_t Function;
|
||||
hipModuleLoad(&Module, fileName);
|
||||
hipModuleGetFunction(&Function, Module, kernel_name);
|
||||
checkHipErrors(hipModuleLoad(&Module, fileName));
|
||||
checkHipErrors(hipModuleGetFunction(&Function, Module, kernel_name));
|
||||
|
||||
void* args[2] = {&Ad, &Bd};
|
||||
|
||||
hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, args, nullptr);
|
||||
checkHipErrors(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, args, nullptr));
|
||||
|
||||
hipMemcpyDtoH(B, Bd, SIZE);
|
||||
checkHipErrors(hipMemcpyDtoH(B, Bd, SIZE));
|
||||
int mismatchCount = 0;
|
||||
for (uint32_t i = 0; i < LEN; i++) {
|
||||
if (A[i] != B[i]) {
|
||||
@@ -78,10 +79,10 @@ int main() {
|
||||
std::cout << "FAILED!\n";
|
||||
};
|
||||
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
checkHipErrors(hipFree(Ad));
|
||||
checkHipErrors(hipFree(Bd));
|
||||
delete[] A;
|
||||
delete[] B;
|
||||
hipCtxDestroy(context);
|
||||
checkHipErrors(hipCtxDestroy(context));
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ THE SOFTWARE.
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include "hip_helper.h"
|
||||
|
||||
#ifdef __HIP_PLATFORM_AMD__
|
||||
#include <hip/hip_ext.h>
|
||||
@@ -36,12 +37,6 @@ THE SOFTWARE.
|
||||
#define fileName "vcpy_kernel.code"
|
||||
#define kernel_name "hello_world"
|
||||
|
||||
#define HIP_CHECK(status) \
|
||||
if (status != hipSuccess) { \
|
||||
std::cout << "Got Status: " << status << " at Line: " << __LINE__ << std::endl; \
|
||||
exit(0); \
|
||||
}
|
||||
|
||||
int main() {
|
||||
float *A, *B;
|
||||
hipDeviceptr_t Ad, Bd;
|
||||
@@ -56,18 +51,18 @@ int main() {
|
||||
hipInit(0);
|
||||
hipDevice_t device;
|
||||
hipCtx_t context;
|
||||
hipDeviceGet(&device, 0);
|
||||
hipCtxCreate(&context, 0, device);
|
||||
checkHipErrors(hipDeviceGet(&device, 0));
|
||||
checkHipErrors(hipCtxCreate(&context, 0, device));
|
||||
|
||||
hipMalloc((void**)&Ad, SIZE);
|
||||
hipMalloc((void**)&Bd, SIZE);
|
||||
checkHipErrors(hipMalloc((void**)&Ad, SIZE));
|
||||
checkHipErrors(hipMalloc((void**)&Bd, SIZE));
|
||||
|
||||
hipMemcpyHtoD(Ad, A, SIZE);
|
||||
hipMemcpyHtoD(Bd, B, SIZE);
|
||||
checkHipErrors(hipMemcpyHtoD(Ad, A, SIZE));
|
||||
checkHipErrors(hipMemcpyHtoD(Bd, B, SIZE));
|
||||
hipModule_t Module;
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleLoad(&Module, fileName));
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
|
||||
checkHipErrors(hipModuleLoad(&Module, fileName));
|
||||
checkHipErrors(hipModuleGetFunction(&Function, Module, kernel_name));
|
||||
|
||||
struct {
|
||||
void* _Ad;
|
||||
@@ -83,10 +78,10 @@ int main() {
|
||||
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
|
||||
HIP_CHECK(
|
||||
checkHipErrors(
|
||||
hipExtModuleLaunchKernel(Function, LEN, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config, 0));
|
||||
|
||||
hipMemcpyDtoH(B, Bd, SIZE);
|
||||
checkHipErrors(hipMemcpyDtoH(B, Bd, SIZE));
|
||||
|
||||
int mismatchCount = 0;
|
||||
for (uint32_t i = 0; i < LEN; i++) {
|
||||
@@ -102,10 +97,10 @@ int main() {
|
||||
std::cout << "FAILED!\n";
|
||||
};
|
||||
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
checkHipErrors(hipFree(Ad));
|
||||
checkHipErrors(hipFree(Bd));
|
||||
delete[] A;
|
||||
delete[] B;
|
||||
hipCtxDestroy(context);
|
||||
checkHipErrors(hipCtxDestroy(context));
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@ THE SOFTWARE.
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <hip/hip_hcc.h>
|
||||
#include "hip_helper.h"
|
||||
|
||||
#define LEN 64
|
||||
#define SIZE LEN << 2
|
||||
@@ -33,12 +34,6 @@ THE SOFTWARE.
|
||||
#define fileName "vcpy_kernel.code"
|
||||
#define kernel_name "hello_world"
|
||||
|
||||
#define HIP_CHECK(status) \
|
||||
if (status != hipSuccess) { \
|
||||
std::cout << "Got Status: " << status << " at Line: " << __LINE__ << std::endl; \
|
||||
exit(0); \
|
||||
}
|
||||
|
||||
int main() {
|
||||
float *A, *B;
|
||||
hipDeviceptr_t Ad, Bd;
|
||||
@@ -53,18 +48,18 @@ int main() {
|
||||
hipInit(0);
|
||||
hipDevice_t device;
|
||||
hipCtx_t context;
|
||||
hipDeviceGet(&device, 0);
|
||||
hipCtxCreate(&context, 0, device);
|
||||
checkHipErrors(hipDeviceGet(&device, 0));
|
||||
checkHipErrors(hipCtxCreate(&context, 0, device));
|
||||
|
||||
hipMalloc((void**)&Ad, SIZE);
|
||||
hipMalloc((void**)&Bd, SIZE);
|
||||
checkHipErrors(hipMalloc((void**)&Ad, SIZE));
|
||||
checkHipErrors(hipMalloc((void**)&Bd, SIZE));
|
||||
|
||||
hipMemcpyHtoD(Ad, A, SIZE);
|
||||
hipMemcpyHtoD(Bd, B, SIZE);
|
||||
checkHipErrors(hipMemcpyHtoD(Ad, A, SIZE));
|
||||
checkHipErrors(hipMemcpyHtoD(Bd, B, SIZE));
|
||||
hipModule_t Module;
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleLoad(&Module, fileName));
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
|
||||
checkHipErrors(hipModuleLoad(&Module, fileName));
|
||||
checkHipErrors(hipModuleGetFunction(&Function, Module, kernel_name));
|
||||
|
||||
struct {
|
||||
void* _Ad;
|
||||
@@ -79,9 +74,9 @@ int main() {
|
||||
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
|
||||
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
|
||||
checkHipErrors(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
|
||||
|
||||
hipMemcpyDtoH(B, Bd, SIZE);
|
||||
checkHipErrors(hipMemcpyDtoH(B, Bd, SIZE));
|
||||
|
||||
int mismatchCount = 0;
|
||||
for (uint32_t i = 0; i < LEN; i++) {
|
||||
@@ -97,10 +92,10 @@ int main() {
|
||||
std::cout << "FAILED!\n";
|
||||
};
|
||||
|
||||
hipFree(Ad);
|
||||
checkHipErrors(hipFree(Ad));
|
||||
hipFree(Bd);
|
||||
delete[] A;
|
||||
delete[] B;
|
||||
hipCtxDestroy(context);
|
||||
checkHipErrors(hipCtxDestroy(context));
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -50,5 +50,7 @@ add_custom_target(
|
||||
|
||||
add_dependencies(runKernel.hip.out codeobj)
|
||||
|
||||
target_include_directories(runKernel.hip.out PRIVATE ../../common)
|
||||
|
||||
# Link with HIP
|
||||
target_link_libraries(runKernel.hip.out hip::host)
|
||||
@@ -27,11 +27,12 @@ ifeq (,$(HIP_PATH))
|
||||
endif
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
|
||||
INCLUDES := -I../../common
|
||||
|
||||
all: vcpy_kernel.code runKernel.hip.out
|
||||
|
||||
runKernel.hip.out: runKernel.cpp
|
||||
$(HIPCC) $(HIPCC_FLAGS) $< -o $@
|
||||
$(HIPCC) $(HIPCC_FLAGS) $(INCLUDES) $< -o $@
|
||||
|
||||
vcpy_kernel.code: vcpy_kernel.cpp
|
||||
$(HIPCC) --genco $(GENCO_FLAGS) $^ -o $@
|
||||
|
||||
@@ -31,7 +31,7 @@ THE SOFTWARE.
|
||||
#define SIZE LEN * sizeof(float)
|
||||
|
||||
#define fileName "vcpy_kernel.code"
|
||||
#define HIP_CHECK(cmd) \
|
||||
#define checkHipErrors(cmd) \
|
||||
{ \
|
||||
hipError_t status = cmd; \
|
||||
if (status != hipSuccess) { \
|
||||
@@ -64,23 +64,23 @@ int main() {
|
||||
hipMemcpyHtoD(hipDeviceptr_t(Ad), A, SIZE);
|
||||
hipMemcpyHtoD((hipDeviceptr_t)(Bd), B, SIZE);
|
||||
hipModule_t Module;
|
||||
HIP_CHECK(hipModuleLoad(&Module, fileName));
|
||||
checkHipErrors(hipModuleLoad(&Module, fileName));
|
||||
|
||||
float myDeviceGlobal_h = 42.0;
|
||||
float* deviceGlobal;
|
||||
size_t deviceGlobalSize;
|
||||
HIP_CHECK(hipModuleGetGlobal((void**)&deviceGlobal, &deviceGlobalSize, Module, "myDeviceGlobal"));
|
||||
HIP_CHECK(hipMemcpyHtoD(hipDeviceptr_t(deviceGlobal), &myDeviceGlobal_h, deviceGlobalSize));
|
||||
checkHipErrors(hipModuleGetGlobal((void**)&deviceGlobal, &deviceGlobalSize, Module, "myDeviceGlobal"));
|
||||
checkHipErrors(hipMemcpyHtoD(hipDeviceptr_t(deviceGlobal), &myDeviceGlobal_h, deviceGlobalSize));
|
||||
|
||||
#define ARRAY_SIZE 16
|
||||
|
||||
float myDeviceGlobalArray_h[ARRAY_SIZE];
|
||||
float *myDeviceGlobalArray;
|
||||
size_t myDeviceGlobalArraySize;
|
||||
HIP_CHECK(hipModuleGetGlobal((void**)&myDeviceGlobalArray, &myDeviceGlobalArraySize, Module, "myDeviceGlobalArray"));
|
||||
checkHipErrors(hipModuleGetGlobal((void**)&myDeviceGlobalArray, &myDeviceGlobalArraySize, Module, "myDeviceGlobalArray"));
|
||||
for (int i = 0; i < ARRAY_SIZE; i++) {
|
||||
myDeviceGlobalArray_h[i] = i * 1000.0f;
|
||||
HIP_CHECK(hipMemcpyHtoD(hipDeviceptr_t(myDeviceGlobalArray), &myDeviceGlobalArray_h, myDeviceGlobalArraySize));
|
||||
checkHipErrors(hipMemcpyHtoD(hipDeviceptr_t(myDeviceGlobalArray), &myDeviceGlobalArray_h, myDeviceGlobalArraySize));
|
||||
}
|
||||
|
||||
struct {
|
||||
@@ -98,8 +98,8 @@ int main() {
|
||||
|
||||
{
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, "hello_world"));
|
||||
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
|
||||
checkHipErrors(hipModuleGetFunction(&Function, Module, "hello_world"));
|
||||
checkHipErrors(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
|
||||
|
||||
hipMemcpyDtoH(B, Bd, SIZE);
|
||||
|
||||
@@ -123,13 +123,13 @@ int main() {
|
||||
|
||||
{
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, "test_globals"));
|
||||
checkHipErrors(hipModuleGetFunction(&Function, Module, "test_globals"));
|
||||
int val =-1;
|
||||
HIP_CHECK(hipFuncGetAttribute(&val, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES,Function));
|
||||
checkHipErrors(hipFuncGetAttribute(&val, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES,Function));
|
||||
printf("Shared Size Bytes = %d\n",val);
|
||||
HIP_CHECK(hipFuncGetAttribute(&val, HIP_FUNC_ATTRIBUTE_NUM_REGS, Function));
|
||||
checkHipErrors(hipFuncGetAttribute(&val, HIP_FUNC_ATTRIBUTE_NUM_REGS, Function));
|
||||
printf("Num Regs = %d\n",val);
|
||||
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
|
||||
checkHipErrors(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
|
||||
|
||||
hipMemcpyDtoH(B, Bd, SIZE);
|
||||
|
||||
|
||||
@@ -22,6 +22,8 @@ project(hipDispatchLatency)
|
||||
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
include_directories(../../common)
|
||||
|
||||
if (NOT DEFINED ROCM_PATH )
|
||||
set ( ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory." )
|
||||
endif ()
|
||||
|
||||
@@ -26,8 +26,9 @@ ifeq (,$(HIP_PATH))
|
||||
HIP_PATH=../../..
|
||||
endif
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc -std=c++11
|
||||
INCLUDES := -I../../common
|
||||
|
||||
CXXFLAGS = -O3
|
||||
CXXFLAGS = -O3 $(INCLUDES)
|
||||
|
||||
all: test_kernel.code hipDispatchLatency.out hipDispatchEnqueueRateMT.out
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ THE SOFTWARE.
|
||||
#ifdef __HIP_PLATFORM_AMD__
|
||||
#include "hip/hip_ext.h"
|
||||
#endif
|
||||
#include "hip_helper.h"
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <chrono>
|
||||
@@ -41,16 +42,6 @@ THE SOFTWARE.
|
||||
#define failed(...) \
|
||||
abort();
|
||||
|
||||
#define HIPCHECK(error) \
|
||||
{ \
|
||||
hipError_t localError = error; \
|
||||
if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) { \
|
||||
printf("error: '%s'(%d) from %s at %s:%d\n", hipGetErrorString(localError), \
|
||||
localError, #error, __FILE__, __LINE__); \
|
||||
failed("API returned error code."); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
__global__ void EmptyKernel() {}
|
||||
|
||||
@@ -87,12 +78,12 @@ void hipModuleLaunchKernel_enqueue_rate(const std::vector<char>& buffer, std::at
|
||||
{
|
||||
//resources necessary for this thread
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreate(&stream));
|
||||
checkHipErrors(hipStreamCreate(&stream));
|
||||
hipModule_t module;
|
||||
hipFunction_t function;
|
||||
|
||||
HIPCHECK(hipModuleLoadData(&module, &buffer[0]));
|
||||
HIPCHECK(hipModuleGetFunction(&function, module, "test"));
|
||||
checkHipErrors(hipModuleLoadData(&module, &buffer[0]));
|
||||
checkHipErrors(hipModuleGetFunction(&function, module, "test"));
|
||||
|
||||
void* kernel_params = nullptr;
|
||||
std::array<float, TOTAL_RUN_COUNT> results;
|
||||
@@ -103,13 +94,13 @@ void hipModuleLaunchKernel_enqueue_rate(const std::vector<char>& buffer, std::at
|
||||
|
||||
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
HIPCHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, stream, &kernel_params, nullptr));
|
||||
checkHipErrors(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, stream, &kernel_params, nullptr));
|
||||
auto stop = std::chrono::high_resolution_clock::now();
|
||||
results[i] = std::chrono::duration<double, std::milli>(stop - start).count();
|
||||
}
|
||||
HIPCHECK(hipModuleUnload(module));
|
||||
checkHipErrors(hipModuleUnload(module));
|
||||
print_timing("Thread ID : " + std::to_string(tid) + " , " + "hipModuleLaunchKernel enqueue rate", results);
|
||||
HIPCHECK(hipStreamDestroy(stream));
|
||||
checkHipErrors(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
// Measure time taken to enqueue a kernel on the GPU using hipLaunchKernelGGL
|
||||
@@ -117,7 +108,7 @@ void hipLaunchKernelGGL_enqueue_rate(const std::vector<char>& buffer, std::atomi
|
||||
{
|
||||
//resources necessary for this thread
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreate(&stream));
|
||||
checkHipErrors(hipStreamCreate(&stream));
|
||||
std::array<float, TOTAL_RUN_COUNT> results;
|
||||
|
||||
//synchronize all threads, before running
|
||||
@@ -131,7 +122,7 @@ void hipLaunchKernelGGL_enqueue_rate(const std::vector<char>& buffer, std::atomi
|
||||
results[i] = std::chrono::duration<double, std::milli>(stop - start).count();
|
||||
}
|
||||
print_timing("Thread ID : " + std::to_string(tid) + " , " + "hipLaunchKernelGGL enqueue rate", results);
|
||||
HIPCHECK(hipStreamDestroy(stream));
|
||||
checkHipErrors(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
// Simple thread pool
|
||||
|
||||
@@ -21,6 +21,7 @@ THE SOFTWARE.
|
||||
#ifdef __HIP_PLATFORM_AMD__
|
||||
#include "hip/hip_ext.h"
|
||||
#endif
|
||||
#include "hip_helper.h"
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include <algorithm>
|
||||
@@ -66,19 +67,19 @@ void print_timing(std::string test, const std::array<float, TOTAL_RUN_COUNT> &re
|
||||
int main() {
|
||||
hipStream_t stream0 = 0;
|
||||
hipDevice_t device;
|
||||
hipDeviceGet(&device, 0);
|
||||
checkHipErrors(hipDeviceGet(&device, 0));
|
||||
hipCtx_t context;
|
||||
hipCtxCreate(&context, 0, device);
|
||||
checkHipErrors(hipCtxCreate(&context, 0, device));
|
||||
hipModule_t module;
|
||||
hipFunction_t function;
|
||||
hipModuleLoad(&module, FILE_NAME);
|
||||
hipModuleGetFunction(&function, module, KERNEL_NAME);
|
||||
checkHipErrors(hipModuleLoad(&module, FILE_NAME));
|
||||
checkHipErrors(hipModuleGetFunction(&function, module, KERNEL_NAME));
|
||||
void* params = nullptr;
|
||||
|
||||
std::array<float, TOTAL_RUN_COUNT> results;
|
||||
hipEvent_t start, stop;
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
checkHipErrors(hipEventCreate(&start));
|
||||
checkHipErrors(hipEventCreate(&stop));
|
||||
|
||||
/************************************************************************************/
|
||||
/* HIP kernel launch enqueue rate: */
|
||||
@@ -88,7 +89,7 @@ int main() {
|
||||
// Timing hipModuleLaunchKernel
|
||||
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, ¶ms, nullptr);
|
||||
checkHipErrors(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, ¶ms, nullptr));
|
||||
auto stop = std::chrono::high_resolution_clock::now();
|
||||
results[i] = std::chrono::duration<float, std::milli>(stop - start).count();
|
||||
}
|
||||
@@ -110,11 +111,11 @@ int main() {
|
||||
|
||||
//Timing around the dispatch
|
||||
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
|
||||
hipEventRecord(start, 0);
|
||||
checkHipErrors(hipEventRecord(start, 0));
|
||||
hipLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0);
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&results[i], start, stop);
|
||||
checkHipErrors(hipEventRecord(stop, 0));
|
||||
checkHipErrors(hipEventSynchronize(stop));
|
||||
checkHipErrors(hipEventElapsedTime(&results[i], start, stop));
|
||||
}
|
||||
print_timing("Timing around single dispatch latency", results);
|
||||
|
||||
@@ -124,18 +125,18 @@ int main() {
|
||||
/*********************************************************************************/
|
||||
|
||||
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
|
||||
hipEventRecord(start, 0);
|
||||
checkHipErrors(hipEventRecord(start, 0));
|
||||
for (int j = 0; j < BATCH_SIZE; j++) {
|
||||
hipLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0);
|
||||
}
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&results[i], start, stop);
|
||||
checkHipErrors(hipEventRecord(stop, 0));
|
||||
checkHipErrors(hipEventSynchronize(stop));
|
||||
checkHipErrors(hipEventElapsedTime(&results[i], start, stop));
|
||||
}
|
||||
print_timing("Batch dispatch latency", results, BATCH_SIZE);
|
||||
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
hipCtxDestroy(context);
|
||||
checkHipErrors(hipEventDestroy(start));
|
||||
checkHipErrors(hipEventDestroy(stop));
|
||||
checkHipErrors(hipCtxDestroy(context));
|
||||
}
|
||||
|
||||
|
||||
@@ -57,6 +57,8 @@ add_executable(hipInfo hipInfo.cpp)
|
||||
# Link with HIP
|
||||
target_link_libraries(hipInfo hip::host)
|
||||
|
||||
target_include_directories(hipInfo PRIVATE ../../common)
|
||||
|
||||
# Used only when make install is called
|
||||
# when hipInfo is built as part of compute project
|
||||
# hipInfo.exe will be installed to install/hip/bin path
|
||||
|
||||
@@ -26,13 +26,14 @@ ifeq (,$(HIP_PATH))
|
||||
HIP_PATH=../../..
|
||||
endif
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
INCLUDES := -I../../common
|
||||
|
||||
EXE=hipInfo
|
||||
|
||||
all: install
|
||||
|
||||
$(EXE): hipInfo.cpp
|
||||
$(HIPCC) hipInfo.cpp -o $@
|
||||
$(HIPCC) hipInfo.cpp $(INCLUDES) -o $@
|
||||
|
||||
install: $(EXE)
|
||||
cp $(EXE) $(HIP_PATH)/bin
|
||||
|
||||
@@ -23,6 +23,7 @@ THE SOFTWARE.
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_helper.h"
|
||||
|
||||
#define KNRM "\x1B[0m"
|
||||
#define KRED "\x1B[31m"
|
||||
@@ -33,20 +34,6 @@ THE SOFTWARE.
|
||||
#define KCYN "\x1B[36m"
|
||||
#define KWHT "\x1B[37m"
|
||||
|
||||
#define failed(...) \
|
||||
printf("%serror: ", KRED); \
|
||||
printf(__VA_ARGS__); \
|
||||
printf("\n"); \
|
||||
printf("error: TEST FAILED\n%s", KNRM); \
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
#define HIPCHECK(error) \
|
||||
if (error != hipSuccess) { \
|
||||
printf("%serror: '%s'(%d) at %s:%d%s\n", KRED, hipGetErrorString(error), error, __FILE__, \
|
||||
__LINE__, KNRM); \
|
||||
failed("API returned error code."); \
|
||||
}
|
||||
|
||||
void printCompilerInfo() {
|
||||
#ifdef __NVCC__
|
||||
printf("compiler: nvcc\n");
|
||||
@@ -76,7 +63,7 @@ void printDeviceProp(int deviceId) {
|
||||
cout << setw(w1) << "device#" << deviceId << endl;
|
||||
|
||||
hipDeviceProp_t props = {0};
|
||||
HIPCHECK(hipGetDeviceProperties(&props, deviceId));
|
||||
checkHipErrors(hipGetDeviceProperties(&props, deviceId));
|
||||
|
||||
cout << setw(w1) << "Name: " << props.name << endl;
|
||||
cout << setw(w1) << "pciBusID: " << props.pciBusID << endl;
|
||||
@@ -149,11 +136,11 @@ void printDeviceProp(int deviceId) {
|
||||
cout << setw(w1) << "gcnArchName: " << props.gcnArchName << endl;
|
||||
#endif
|
||||
int deviceCnt;
|
||||
hipGetDeviceCount(&deviceCnt);
|
||||
checkHipErrors(hipGetDeviceCount(&deviceCnt));
|
||||
cout << setw(w1) << "peers: ";
|
||||
for (int i = 0; i < deviceCnt; i++) {
|
||||
int isPeer;
|
||||
hipDeviceCanAccessPeer(&isPeer, i, deviceId);
|
||||
checkHipErrors(hipDeviceCanAccessPeer(&isPeer, i, deviceId));
|
||||
if (isPeer) {
|
||||
cout << "device#" << i << " ";
|
||||
}
|
||||
@@ -162,7 +149,7 @@ void printDeviceProp(int deviceId) {
|
||||
cout << setw(w1) << "non-peers: ";
|
||||
for (int i = 0; i < deviceCnt; i++) {
|
||||
int isPeer;
|
||||
hipDeviceCanAccessPeer(&isPeer, i, deviceId);
|
||||
checkHipErrors(hipDeviceCanAccessPeer(&isPeer, i, deviceId));
|
||||
if (!isPeer) {
|
||||
cout << "device#" << i << " ";
|
||||
}
|
||||
@@ -185,7 +172,7 @@ void printDeviceProp(int deviceId) {
|
||||
|
||||
|
||||
size_t free, total;
|
||||
hipMemGetInfo(&free, &total);
|
||||
checkHipErrors(hipMemGetInfo(&free, &total));
|
||||
|
||||
cout << fixed << setprecision(2);
|
||||
cout << setw(w1) << "memInfo.total: " << bytesToGB(total) << " GB" << endl;
|
||||
@@ -202,10 +189,10 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
int deviceCnt;
|
||||
|
||||
HIPCHECK(hipGetDeviceCount(&deviceCnt));
|
||||
checkHipErrors(hipGetDeviceCount(&deviceCnt));
|
||||
|
||||
for (int i = 0; i < deviceCnt; i++) {
|
||||
hipSetDevice(i);
|
||||
checkHipErrors(hipSetDevice(i));
|
||||
printDeviceProp(i);
|
||||
}
|
||||
|
||||
|
||||
@@ -40,5 +40,7 @@ set(CMAKE_BUILD_TYPE Release)
|
||||
# Create the excutable
|
||||
add_executable(MatrixTranspose MatrixTranspose.cpp)
|
||||
|
||||
target_include_directories(MatrixTranspose PRIVATE ../../common)
|
||||
|
||||
# Link with HIP
|
||||
target_link_libraries(MatrixTranspose hip::host)
|
||||
|
||||
@@ -30,6 +30,7 @@ HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
|
||||
TARGET=hcc
|
||||
|
||||
INCLUDES := -I../../common
|
||||
SOURCES = MatrixTranspose.cpp
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
|
||||
@@ -40,7 +41,7 @@ EXECUTABLE=./MatrixTranspose
|
||||
|
||||
all: $(EXECUTABLE) test
|
||||
|
||||
CXXFLAGS =-g
|
||||
CXXFLAGS =-g $(INCLUDES)
|
||||
CXX=$(HIPCC)
|
||||
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ THE SOFTWARE.
|
||||
|
||||
// hip header file
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_helper.h"
|
||||
|
||||
|
||||
#define WIDTH 1024
|
||||
@@ -61,7 +62,7 @@ int main() {
|
||||
float* gpuTransposeMatrix;
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
hipGetDeviceProperties(&devProp, 0);
|
||||
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
|
||||
|
||||
std::cout << "Device name " << devProp.name << std::endl;
|
||||
|
||||
@@ -78,11 +79,11 @@ int main() {
|
||||
}
|
||||
|
||||
// allocate the memory on the device side
|
||||
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
|
||||
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
|
||||
checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
|
||||
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
|
||||
|
||||
// Memory transfer from host to device
|
||||
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
|
||||
checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
|
||||
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
|
||||
@@ -90,7 +91,7 @@ int main() {
|
||||
gpuMatrix, WIDTH);
|
||||
|
||||
// Memory transfer from device to host
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
|
||||
checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
|
||||
|
||||
// CPU MatrixTranspose computation
|
||||
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
|
||||
@@ -110,8 +111,8 @@ int main() {
|
||||
}
|
||||
|
||||
// free the resources on device side
|
||||
hipFree(gpuMatrix);
|
||||
hipFree(gpuTransposeMatrix);
|
||||
checkHipErrors(hipFree(gpuMatrix));
|
||||
checkHipErrors(hipFree(gpuTransposeMatrix));
|
||||
|
||||
// free the resources on host side
|
||||
free(Matrix);
|
||||
|
||||
@@ -40,5 +40,7 @@ set(CMAKE_BUILD_TYPE Release)
|
||||
# Create the excutable
|
||||
add_executable(inline_asm inline_asm.cpp)
|
||||
|
||||
target_include_directories(inline_asm PRIVATE ../../common)
|
||||
|
||||
# Link with HIP
|
||||
target_link_libraries(inline_asm hip::host)
|
||||
|
||||
@@ -32,7 +32,7 @@ TARGET=hcc
|
||||
|
||||
SOURCES = inline_asm.cpp
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
|
||||
INCLUDES := -I../../common
|
||||
EXECUTABLE=./inline_asm
|
||||
|
||||
.PHONY: test
|
||||
@@ -40,7 +40,7 @@ EXECUTABLE=./inline_asm
|
||||
|
||||
all: $(EXECUTABLE) test
|
||||
|
||||
CXXFLAGS =-g
|
||||
CXXFLAGS =-g $(INCLUDES)
|
||||
CXX=$(HIPCC)
|
||||
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ THE SOFTWARE.
|
||||
|
||||
// hip header file
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_helper.h"
|
||||
|
||||
#define WIDTH 1024
|
||||
|
||||
@@ -59,13 +60,13 @@ int main() {
|
||||
float* gpuTransposeMatrix;
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
hipGetDeviceProperties(&devProp, 0);
|
||||
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
|
||||
|
||||
std::cout << "Device name " << devProp.name << std::endl;
|
||||
|
||||
hipEvent_t start, stop;
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
checkHipErrors(hipEventCreate(&start));
|
||||
checkHipErrors(hipEventCreate(&stop));
|
||||
float eventMs = 1.0f;
|
||||
|
||||
int i;
|
||||
@@ -81,25 +82,25 @@ int main() {
|
||||
}
|
||||
|
||||
// allocate the memory on the device side
|
||||
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
|
||||
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
|
||||
checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
|
||||
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
|
||||
|
||||
// Record the start event
|
||||
hipEventRecord(start, NULL);
|
||||
checkHipErrors(hipEventRecord(start, NULL));
|
||||
|
||||
// Memory transfer from host to device
|
||||
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
|
||||
checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
|
||||
|
||||
// Record the stop event
|
||||
hipEventRecord(stop, NULL);
|
||||
hipEventSynchronize(stop);
|
||||
checkHipErrors(hipEventRecord(stop, NULL));
|
||||
checkHipErrors(hipEventSynchronize(stop));
|
||||
|
||||
hipEventElapsedTime(&eventMs, start, stop);
|
||||
checkHipErrors(hipEventElapsedTime(&eventMs, start, stop));
|
||||
|
||||
printf("hipMemcpyHostToDevice time taken = %6.3fms\n", eventMs);
|
||||
|
||||
// Record the start event
|
||||
hipEventRecord(start, NULL);
|
||||
checkHipErrors(hipEventRecord(start, NULL));
|
||||
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
|
||||
@@ -107,24 +108,24 @@ int main() {
|
||||
gpuMatrix, WIDTH);
|
||||
|
||||
// Record the stop event
|
||||
hipEventRecord(stop, NULL);
|
||||
hipEventSynchronize(stop);
|
||||
checkHipErrors(hipEventRecord(stop, NULL));
|
||||
checkHipErrors(hipEventSynchronize(stop));
|
||||
|
||||
hipEventElapsedTime(&eventMs, start, stop);
|
||||
checkHipErrors(hipEventElapsedTime(&eventMs, start, stop));
|
||||
|
||||
printf("kernel Execution time = %6.3fms\n", eventMs);
|
||||
|
||||
// Record the start event
|
||||
hipEventRecord(start, NULL);
|
||||
checkHipErrors(hipEventRecord(start, NULL));
|
||||
|
||||
// Memory transfer from device to host
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
|
||||
checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
|
||||
|
||||
// Record the stop event
|
||||
hipEventRecord(stop, NULL);
|
||||
hipEventSynchronize(stop);
|
||||
checkHipErrors(hipEventRecord(stop, NULL));
|
||||
checkHipErrors(hipEventSynchronize(stop));
|
||||
|
||||
hipEventElapsedTime(&eventMs, start, stop);
|
||||
checkHipErrors(hipEventElapsedTime(&eventMs, start, stop));
|
||||
|
||||
printf("hipMemcpyDeviceToHost time taken = %6.3fms\n", eventMs);
|
||||
|
||||
@@ -147,8 +148,8 @@ int main() {
|
||||
}
|
||||
|
||||
// free the resources on device side
|
||||
hipFree(gpuMatrix);
|
||||
hipFree(gpuTransposeMatrix);
|
||||
checkHipErrors(hipFree(gpuMatrix));
|
||||
checkHipErrors(hipFree(gpuTransposeMatrix));
|
||||
|
||||
// free the resources on host side
|
||||
free(Matrix);
|
||||
|
||||
@@ -50,5 +50,7 @@ add_custom_target(
|
||||
|
||||
add_dependencies(texture2dDrv codeobj)
|
||||
|
||||
target_include_directories(texture2dDrv PRIVATE ../../common)
|
||||
|
||||
# Link with HIP
|
||||
target_link_libraries(texture2dDrv hip::host)
|
||||
|
||||
@@ -27,11 +27,12 @@ ifeq (,$(HIP_PATH))
|
||||
endif
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
|
||||
INCLUDES := -I../../common
|
||||
|
||||
all: tex2dKernel.code texture2dDrv.out
|
||||
|
||||
texture2dDrv.out: texture2dDrv.cpp
|
||||
$(HIPCC) $(HIPCC_FLAGS) $< -o $@
|
||||
$(HIPCC) $(HIPCC_FLAGS) $(INCLUDES) $< -o $@
|
||||
|
||||
tex2dKernel.code: tex2dKernel.cpp
|
||||
$(HIPCC) --genco $(GENCO_FLAGS) $^ -o $@
|
||||
|
||||
@@ -24,21 +24,12 @@ THE SOFTWARE.
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include "hip_helper.h"
|
||||
|
||||
#define fileName "tex2dKernel.code"
|
||||
|
||||
bool testResult = true;
|
||||
|
||||
#define HIP_CHECK(cmd) \
|
||||
{ \
|
||||
hipError_t status = cmd; \
|
||||
if (status != hipSuccess) { \
|
||||
std::cout << "error: #" << status << " (" << hipGetErrorString(status) \
|
||||
<< ") at line:" << __LINE__ << ": " << #cmd << std::endl; \
|
||||
abort(); \
|
||||
} \
|
||||
}
|
||||
|
||||
template<typename T,
|
||||
typename std::enable_if<std::is_arithmetic<T>::value>::type *t = nullptr>
|
||||
static inline hipArray_Format getArrayFormat() {
|
||||
@@ -154,11 +145,11 @@ bool runTest(hipModule_t &module, const char *refName, const char *funcName) {
|
||||
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc<T>();
|
||||
hipArray_t array;
|
||||
HIP_CHECK(hipMallocArray(&array, &channelDesc, width, height));
|
||||
checkHipErrors(hipMallocArray(&array, &channelDesc, width, height));
|
||||
|
||||
const size_t spitch = width * sizeof(T);
|
||||
|
||||
HIP_CHECK(hipMemcpy2DToArray(array, 0, 0, hData, spitch, width * sizeof(T),
|
||||
checkHipErrors(hipMemcpy2DToArray(array, 0, 0, hData, spitch, width * sizeof(T),
|
||||
height, hipMemcpyHostToDevice));
|
||||
|
||||
hipResourceDesc resDesc;
|
||||
@@ -175,10 +166,10 @@ bool runTest(hipModule_t &module, const char *refName, const char *funcName) {
|
||||
texDesc.normalizedCoords = 0;
|
||||
|
||||
hipTextureObject_t texObj;
|
||||
HIP_CHECK(hipCreateTextureObject(&texObj, &resDesc, &texDesc, nullptr));
|
||||
checkHipErrors(hipCreateTextureObject(&texObj, &resDesc, &texDesc, nullptr));
|
||||
|
||||
T *dData = NULL;
|
||||
HIP_CHECK(hipMalloc((void** )&dData, size));
|
||||
checkHipErrors(hipMalloc((void** )&dData, size));
|
||||
|
||||
struct {
|
||||
void *_Ad;
|
||||
@@ -197,18 +188,18 @@ bool runTest(hipModule_t &module, const char *refName, const char *funcName) {
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &sizeTemp, HIP_LAUNCH_PARAM_END };
|
||||
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, module, funcName));
|
||||
checkHipErrors(hipModuleGetFunction(&Function, module, funcName));
|
||||
|
||||
int temp1 = width / 16;
|
||||
int temp2 = height / 16;
|
||||
HIP_CHECK(
|
||||
checkHipErrors(
|
||||
hipModuleLaunchKernel(Function, 16, 16, 1, temp1, temp2, 1, 0, 0, NULL,
|
||||
(void** )&config));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
checkHipErrors(hipDeviceSynchronize());
|
||||
|
||||
T *hOutputData = (T*) malloc(size);
|
||||
memset(hOutputData, 0, size);
|
||||
HIP_CHECK(hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost));
|
||||
checkHipErrors(hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost));
|
||||
|
||||
for (int i = 0; i < height; i++) {
|
||||
for (int j = 0; j < width; j++) {
|
||||
@@ -219,9 +210,9 @@ bool runTest(hipModule_t &module, const char *refName, const char *funcName) {
|
||||
}
|
||||
}
|
||||
}
|
||||
HIP_CHECK(hipDestroyTextureObject(texObj));
|
||||
HIP_CHECK(hipFree(dData));
|
||||
HIP_CHECK(hipFreeArray(array));
|
||||
checkHipErrors(hipDestroyTextureObject(texObj));
|
||||
checkHipErrors(hipFree(dData));
|
||||
checkHipErrors(hipFreeArray(array));
|
||||
free(hOutputData);
|
||||
free(hData);
|
||||
printf("%s test %s ...\n", funcName, testResult ? "PASSED" : "FAILED");
|
||||
@@ -231,7 +222,7 @@ bool runTest(hipModule_t &module, const char *refName, const char *funcName) {
|
||||
inline bool isImageSupported() {
|
||||
int imageSupport = 1;
|
||||
#ifdef __HIP_PLATFORM_AMD__
|
||||
HIP_CHECK(hipDeviceGetAttribute(&imageSupport, hipDeviceAttributeImageSupport,
|
||||
checkHipErrors(hipDeviceGetAttribute(&imageSupport, hipDeviceAttributeImageSupport,
|
||||
0));
|
||||
#endif
|
||||
return imageSupport != 0;
|
||||
@@ -242,10 +233,10 @@ int main(int argc, char** argv) {
|
||||
printf("Texture is not support on the device. Skipped.\n");
|
||||
return 0;
|
||||
}
|
||||
HIP_CHECK(hipInit(0));
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
checkHipErrors(hipInit(0));
|
||||
checkHipErrors(hipSetDevice(0));
|
||||
hipModule_t module;
|
||||
HIP_CHECK(hipModuleLoad(&module, fileName));
|
||||
checkHipErrors(hipModuleLoad(&module, fileName));
|
||||
testResult = testResult && runTest<char>(module, "texChar", "tex2dKernelChar");
|
||||
testResult = testResult && runTest<short>(module, "texShort", "tex2dKernelShort");
|
||||
testResult = testResult && runTest<int>(module, "texInt", "tex2dKernelInt");
|
||||
@@ -255,7 +246,7 @@ int main(int argc, char** argv) {
|
||||
testResult = testResult && runTest<int4>(module, "texInt4", "tex2dKernelInt4");
|
||||
testResult = testResult && runTest<float4>(module, "texFloat4", "tex2dKernelFloat4");
|
||||
|
||||
HIP_CHECK(hipModuleUnload(module));
|
||||
checkHipErrors(hipModuleUnload(module));
|
||||
printf("texture2dDrv %s ...\n", testResult ? "PASSED" : "FAILED");
|
||||
return testResult ? EXIT_SUCCESS : EXIT_FAILURE;
|
||||
}
|
||||
|
||||
@@ -51,6 +51,8 @@ set(MY_NVCC_OPTIONS)
|
||||
set_source_files_properties(${MY_SOURCE_FILES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
hip_add_executable(${MY_TARGET_NAME} ${MY_SOURCE_FILES} HIPCC_OPTIONS ${MY_HIPCC_OPTIONS} CLANG_OPTIONS ${MY_CLANG_OPTIONS} NVCC_OPTIONS ${MY_NVCC_OPTIONS})
|
||||
|
||||
target_include_directories(${MY_TARGET_NAME} PRIVATE ../../common)
|
||||
|
||||
# Search for rocm in common locations
|
||||
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH})
|
||||
find_package(hip QUIET CONFIG)
|
||||
|
||||
@@ -24,6 +24,7 @@ THE SOFTWARE.
|
||||
|
||||
// hip header file
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_helper.h"
|
||||
|
||||
|
||||
#define WIDTH 1024
|
||||
@@ -61,7 +62,7 @@ int main() {
|
||||
float* gpuTransposeMatrix;
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
hipGetDeviceProperties(&devProp, 0);
|
||||
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
|
||||
|
||||
std::cout << "Device name " << devProp.name << std::endl;
|
||||
|
||||
@@ -78,11 +79,11 @@ int main() {
|
||||
}
|
||||
|
||||
// allocate the memory on the device side
|
||||
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
|
||||
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
|
||||
checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
|
||||
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
|
||||
|
||||
// Memory transfer from host to device
|
||||
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
|
||||
checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
|
||||
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
|
||||
@@ -90,7 +91,7 @@ int main() {
|
||||
gpuMatrix, WIDTH);
|
||||
|
||||
// Memory transfer from device to host
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
|
||||
checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
|
||||
|
||||
// CPU MatrixTranspose computation
|
||||
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
|
||||
@@ -110,8 +111,8 @@ int main() {
|
||||
}
|
||||
|
||||
// free the resources on device side
|
||||
hipFree(gpuMatrix);
|
||||
hipFree(gpuTransposeMatrix);
|
||||
checkHipErrors(hipFree(gpuMatrix));
|
||||
checkHipErrors(hipFree(gpuTransposeMatrix));
|
||||
|
||||
// free the resources on host side
|
||||
free(Matrix);
|
||||
|
||||
@@ -40,5 +40,7 @@ set(CMAKE_BUILD_TYPE Release)
|
||||
# Create the excutable
|
||||
add_executable(occupancy occupancy.cpp)
|
||||
|
||||
target_include_directories(occupancy PRIVATE ../../common)
|
||||
|
||||
# Link with HIP
|
||||
target_link_libraries(occupancy hip::host)
|
||||
|
||||
@@ -26,7 +26,7 @@ ifeq (,$(HIP_PATH))
|
||||
HIP_PATH=../../..
|
||||
endif
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
|
||||
INCLUDES := -I../../common
|
||||
EXE=./occupancy
|
||||
|
||||
.PHONY: test
|
||||
@@ -34,7 +34,7 @@ EXE=./occupancy
|
||||
all: test
|
||||
|
||||
$(EXE): occupancy.cpp
|
||||
$(HIPCC) $^ -o $@
|
||||
$(HIPCC) $(INCLUDES) $^ -o $@
|
||||
|
||||
test: $(EXE)
|
||||
$(EXE)
|
||||
|
||||
@@ -19,14 +19,9 @@ THE SOFTWARE.
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include <iostream>
|
||||
#include "hip_helper.h"
|
||||
#define NUM 1000000
|
||||
|
||||
#define HIP_CHECK(status) \
|
||||
if (status != hipSuccess) { \
|
||||
std::cout << "Got Status: " << status << " at Line: " << __LINE__ << std::endl; \
|
||||
exit(0); \
|
||||
}
|
||||
|
||||
// Device (Kernel) function
|
||||
__global__ void multiply(float* C, float* A, float* B, int N){
|
||||
|
||||
@@ -47,11 +42,11 @@ void multiplyCPU(float* C, float* A, float* B, int N){
|
||||
void launchKernel(float* C, float* A, float* B, bool manual){
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
HIP_CHECK(hipGetDeviceProperties(&devProp, 0));
|
||||
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
|
||||
|
||||
hipEvent_t start, stop;
|
||||
HIP_CHECK(hipEventCreate(&start));
|
||||
HIP_CHECK(hipEventCreate(&stop));
|
||||
checkHipErrors(hipEventCreate(&start));
|
||||
checkHipErrors(hipEventCreate(&stop));
|
||||
float eventMs = 1.0f;
|
||||
const unsigned threadsperblock = 32;
|
||||
const unsigned blocks = (NUM/threadsperblock)+1;
|
||||
@@ -66,28 +61,28 @@ void launchKernel(float* C, float* A, float* B, bool manual){
|
||||
std::cout << std::endl << "Manual Configuration with block size " << blockSize << std::endl;
|
||||
}
|
||||
else{
|
||||
HIP_CHECK(hipOccupancyMaxPotentialBlockSize(&mingridSize, &blockSize, multiply, 0, 0));
|
||||
checkHipErrors(hipOccupancyMaxPotentialBlockSize(&mingridSize, &blockSize, multiply, 0, 0));
|
||||
std::cout << std::endl << "Automatic Configuation based on hipOccupancyMaxPotentialBlockSize " << std::endl;
|
||||
std::cout << "Suggested blocksize is " << blockSize << ", Minimum gridsize is " << mingridSize << std::endl;
|
||||
gridSize = (NUM/blockSize)+1;
|
||||
}
|
||||
|
||||
// Record the start event
|
||||
HIP_CHECK(hipEventRecord(start, NULL));
|
||||
checkHipErrors(hipEventRecord(start, NULL));
|
||||
|
||||
// Launching the Kernel from Host
|
||||
hipLaunchKernelGGL(multiply, dim3(gridSize), dim3(blockSize), 0, 0, C, A, B, NUM);
|
||||
|
||||
// Record the stop event
|
||||
HIP_CHECK(hipEventRecord(stop, NULL));
|
||||
HIP_CHECK(hipEventSynchronize(stop));
|
||||
checkHipErrors(hipEventRecord(stop, NULL));
|
||||
checkHipErrors(hipEventSynchronize(stop));
|
||||
|
||||
HIP_CHECK(hipEventElapsedTime(&eventMs, start, stop));
|
||||
checkHipErrors(hipEventElapsedTime(&eventMs, start, stop));
|
||||
printf("kernel Execution time = %6.3fms\n", eventMs);
|
||||
|
||||
//Calculate Occupancy
|
||||
int numBlock = 0;
|
||||
HIP_CHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, multiply, blockSize, 0));
|
||||
checkHipErrors(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, multiply, blockSize, 0));
|
||||
|
||||
if(devProp.maxThreadsPerMultiProcessor){
|
||||
std::cout << "Theoretical Occupancy is " << (double)numBlock* blockSize/devProp.maxThreadsPerMultiProcessor * 100 << "%" << std::endl;
|
||||
@@ -113,14 +108,14 @@ int main() {
|
||||
}
|
||||
|
||||
// allocate the memory on the device side
|
||||
HIP_CHECK(hipMalloc((void**)&Ad, NUM * sizeof(float)));
|
||||
HIP_CHECK(hipMalloc((void**)&Bd, NUM * sizeof(float)));
|
||||
HIP_CHECK(hipMalloc((void**)&C0d, NUM * sizeof(float)));
|
||||
HIP_CHECK(hipMalloc((void**)&C1d, NUM * sizeof(float)));
|
||||
checkHipErrors(hipMalloc((void**)&Ad, NUM * sizeof(float)));
|
||||
checkHipErrors(hipMalloc((void**)&Bd, NUM * sizeof(float)));
|
||||
checkHipErrors(hipMalloc((void**)&C0d, NUM * sizeof(float)));
|
||||
checkHipErrors(hipMalloc((void**)&C1d, NUM * sizeof(float)));
|
||||
|
||||
// Memory transfer from host to device
|
||||
HIP_CHECK(hipMemcpy(Ad,A,NUM * sizeof(float), hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(Bd,B,NUM * sizeof(float), hipMemcpyHostToDevice));
|
||||
checkHipErrors(hipMemcpy(Ad,A,NUM * sizeof(float), hipMemcpyHostToDevice));
|
||||
checkHipErrors(hipMemcpy(Bd,B,NUM * sizeof(float), hipMemcpyHostToDevice));
|
||||
|
||||
//Kernel launch with manual/default block size
|
||||
launchKernel(C0d, Ad, Bd, 1);
|
||||
@@ -129,8 +124,8 @@ int main() {
|
||||
launchKernel(C1d, Ad, Bd, 0);
|
||||
|
||||
// Memory transfer from device to host
|
||||
HIP_CHECK(hipMemcpy(C0,C0d, NUM * sizeof(float), hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipMemcpy(C1,C1d, NUM * sizeof(float), hipMemcpyDeviceToHost));
|
||||
checkHipErrors(hipMemcpy(C0,C0d, NUM * sizeof(float), hipMemcpyDeviceToHost));
|
||||
checkHipErrors(hipMemcpy(C1,C1d, NUM * sizeof(float), hipMemcpyDeviceToHost));
|
||||
|
||||
// CPU computation
|
||||
multiplyCPU(cpuC, A, B, NUM);
|
||||
@@ -163,10 +158,10 @@ int main() {
|
||||
printf("\nAutomatic Test PASSED!\n");
|
||||
}
|
||||
|
||||
HIP_CHECK(hipFree(Ad));
|
||||
HIP_CHECK(hipFree(Bd));
|
||||
HIP_CHECK(hipFree(C0d));
|
||||
HIP_CHECK(hipFree(C1d));
|
||||
checkHipErrors(hipFree(Ad));
|
||||
checkHipErrors(hipFree(Bd));
|
||||
checkHipErrors(hipFree(C0d));
|
||||
checkHipErrors(hipFree(C1d));
|
||||
|
||||
free(A);
|
||||
free(B);
|
||||
|
||||
@@ -40,5 +40,7 @@ set(CMAKE_BUILD_TYPE Release)
|
||||
# Create the excutable
|
||||
add_executable(gpuarch gpuarch.cpp)
|
||||
|
||||
target_include_directories(gpuarch PRIVATE ../../common)
|
||||
|
||||
# Link with HIP
|
||||
target_link_libraries(gpuarch hip::host)
|
||||
|
||||
@@ -26,7 +26,7 @@ ifeq (,$(HIP_PATH))
|
||||
HIP_PATH=../../..
|
||||
endif
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
|
||||
INCLUDES := -I../../common
|
||||
EXE=./gpuarch
|
||||
|
||||
.PHONY: test
|
||||
@@ -34,7 +34,7 @@ EXE=./gpuarch
|
||||
all: test
|
||||
|
||||
$(EXE): gpuarch.cpp
|
||||
$(HIPCC) $^ -o $@
|
||||
$(HIPCC) $(INCLUDES) $^ -o $@
|
||||
|
||||
test: $(EXE)
|
||||
$(EXE)
|
||||
|
||||
@@ -25,12 +25,6 @@ THE SOFTWARE.
|
||||
#define SIZE (BLOCKS_PER_GRID * THREADS_PER_BLOCK)
|
||||
#define NOT_SUPPORTED -99 // dummy number indicates unsupported operation
|
||||
|
||||
#define HIP_STATUS_CHECK(status) \
|
||||
if (status != hipSuccess) { \
|
||||
std::cout << "Got Status: " << status << " at Line: " << __LINE__ << std::endl; \
|
||||
exit(0); \
|
||||
}
|
||||
|
||||
// Using __gfx*__ macro one can have GPU architecture specific code flow
|
||||
// For example: If below kernel runs on gfx908 it will increment 'in' by 'value' and store into
|
||||
// 'out'
|
||||
@@ -57,8 +51,8 @@ int main() {
|
||||
int32_t* hInput = static_cast<int32_t*>(malloc(NBytes));
|
||||
int32_t* hOutput = static_cast<int32_t*>(malloc(NBytes));
|
||||
|
||||
HIP_STATUS_CHECK(hipMalloc(&dInput, NBytes));
|
||||
HIP_STATUS_CHECK(hipMalloc(&dOutput, NBytes));
|
||||
checkHipErrors(hipMalloc(&dInput, NBytes));
|
||||
checkHipErrors(hipMalloc(&dOutput, NBytes));
|
||||
|
||||
// Initialize host input/output buffers
|
||||
for (int i = 0; i < SIZE; ++i) {
|
||||
@@ -67,14 +61,14 @@ int main() {
|
||||
}
|
||||
|
||||
// Initialize device input buffer
|
||||
HIP_STATUS_CHECK(hipMemcpy(dInput, hInput, NBytes, hipMemcpyHostToDevice));
|
||||
checkHipErrors(hipMemcpy(dInput, hInput, NBytes, hipMemcpyHostToDevice));
|
||||
|
||||
// Launch kernel
|
||||
hipLaunchKernelGGL(incrementKernel, dim3(BLOCKS_PER_GRID), dim3(THREADS_PER_BLOCK), 0, 0, dInput,
|
||||
dOutput, incrementValue, SIZE);
|
||||
|
||||
// Copy result back to host buffer
|
||||
HIP_STATUS_CHECK(hipMemcpy(hOutput, dOutput, NBytes, hipMemcpyDeviceToHost));
|
||||
checkHipErrors(hipMemcpy(hOutput, dOutput, NBytes, hipMemcpyDeviceToHost));
|
||||
|
||||
bool flag = true;
|
||||
// verify data
|
||||
|
||||
@@ -30,6 +30,7 @@ HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
CLANG=$(HIP_PATH)/llvm/bin/clang
|
||||
LLVM_MC=$(HIP_PATH)/llvm/bin/llvm-mc
|
||||
CLANG_OFFLOAD_BUNDLER=$(HIP_PATH)/llvm/bin/clang-offload-bundler
|
||||
INCLUDES := -I../../common
|
||||
|
||||
SRCS=square.cpp
|
||||
|
||||
@@ -57,8 +58,8 @@ GPU_ARCH9=gfx1103
|
||||
all: src_to_asm asm_to_exec
|
||||
|
||||
src_to_asm:
|
||||
$(HIPCC) -c -S --cuda-host-only -target x86_64-linux-gnu -o $(SQ_HOST_ASM) $(SRCS)
|
||||
$(HIPCC) -c -S --cuda-device-only --offload-arch=$(GPU_ARCH1) --offload-arch=$(GPU_ARCH2) --offload-arch=$(GPU_ARCH3) --offload-arch=$(GPU_ARCH4) --offload-arch=$(GPU_ARCH5) --offload-arch=$(GPU_ARCH6) --offload-arch=$(GPU_ARCH7) --offload-arch=$(GPU_ARCH8) --offload-arch=$(GPU_ARCH9) $(SRCS)
|
||||
$(HIPCC) -c -S $(INCLUDES) --cuda-host-only -target x86_64-linux-gnu -o $(SQ_HOST_ASM) $(SRCS)
|
||||
$(HIPCC) -c -S $(INCLUDES) --cuda-device-only --offload-arch=$(GPU_ARCH1) --offload-arch=$(GPU_ARCH2) --offload-arch=$(GPU_ARCH3) --offload-arch=$(GPU_ARCH4) --offload-arch=$(GPU_ARCH5) --offload-arch=$(GPU_ARCH6) --offload-arch=$(GPU_ARCH7) --offload-arch=$(GPU_ARCH8) --offload-arch=$(GPU_ARCH9) $(SRCS)
|
||||
|
||||
# You may modify the .s assembly files before the next step
|
||||
# By default, their names will be:
|
||||
|
||||
@@ -19,15 +19,7 @@ THE SOFTWARE.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#define CHECK(cmd) \
|
||||
{\
|
||||
hipError_t error = cmd;\
|
||||
if (error != hipSuccess) { \
|
||||
fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \
|
||||
exit(EXIT_FAILURE);\
|
||||
}\
|
||||
}
|
||||
#include "hip_helper.h"
|
||||
|
||||
/* This kernel is a placeholder for the kernel in assembly generated by this
|
||||
* sample. It will be replaced by the kernel in assembly.
|
||||
@@ -55,14 +47,14 @@ int main(int argc, char *argv[])
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
|
||||
hipDeviceProp_t props;
|
||||
CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/));
|
||||
checkHipErrors(hipGetDeviceProperties(&props, 0/*deviceID*/));
|
||||
printf ("info: running on device %s\n", props.name);
|
||||
|
||||
printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
A_h = (float*)malloc(Nbytes);
|
||||
CHECK(A_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
|
||||
checkHipErrors(A_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
|
||||
C_h = (float*)malloc(Nbytes);
|
||||
CHECK(C_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
|
||||
checkHipErrors(C_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
|
||||
// Fill with Phi + i
|
||||
for (size_t i=0; i<N; i++)
|
||||
{
|
||||
@@ -70,12 +62,12 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
printf ("info: allocate device mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
CHECK(hipMalloc(&A_d, Nbytes));
|
||||
CHECK(hipMalloc(&C_d, Nbytes));
|
||||
checkHipErrors(hipMalloc(&A_d, Nbytes));
|
||||
checkHipErrors(hipMalloc(&C_d, Nbytes));
|
||||
|
||||
|
||||
printf ("info: copy Host2Device\n");
|
||||
CHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
checkHipErrors ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
const unsigned blocks = 512;
|
||||
const unsigned threadsPerBlock = 256;
|
||||
@@ -84,12 +76,12 @@ int main(int argc, char *argv[])
|
||||
vector_square <<<blocks, threadsPerBlock>>> (C_d, A_d, N);
|
||||
|
||||
printf ("info: copy Device2Host\n");
|
||||
CHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
checkHipErrors ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
printf ("info: check result\n");
|
||||
printf ("info: checkHipErrors result\n");
|
||||
for (size_t i=0; i<N; i++) {
|
||||
if (C_h[i] != A_h[i] * A_h[i]) {
|
||||
CHECK(hipErrorUnknown);
|
||||
checkHipErrors(hipErrorUnknown);
|
||||
}
|
||||
}
|
||||
printf ("PASSED!\n");
|
||||
|
||||
@@ -32,6 +32,7 @@ LLVM_MC=$(HIP_PATH)/llvm/bin/llvm-mc
|
||||
CLANG_OFFLOAD_BUNDLER=$(HIP_PATH)/llvm/bin/clang-offload-bundler
|
||||
LLVM_AS=$(HIP_PATH)/llvm/bin/llvm-as
|
||||
LLVM_DIS=$(HIP_PATH)/llvm/bin/llvm-dis
|
||||
INCLUDES := -I../../common
|
||||
|
||||
SRCS=square.cpp
|
||||
|
||||
@@ -60,8 +61,8 @@ GPU_ARCH9=gfx1103
|
||||
all: src_to_ir bc_to_ll ll_to_bc ir_to_exec
|
||||
|
||||
src_to_ir:
|
||||
$(HIPCC) -c -emit-llvm --cuda-host-only -target x86_64-linux-gnu -o $(SQ_HOST_BC) $(SRCS)
|
||||
$(HIPCC) -c -emit-llvm --cuda-device-only --offload-arch=$(GPU_ARCH1) --offload-arch=$(GPU_ARCH2) --offload-arch=$(GPU_ARCH3) --offload-arch=$(GPU_ARCH4) --offload-arch=$(GPU_ARCH5) --offload-arch=$(GPU_ARCH6) --offload-arch=$(GPU_ARCH7) --offload-arch=$(GPU_ARCH8) --offload-arch=$(GPU_ARCH9) $(SRCS)
|
||||
$(HIPCC) $(INCLUDES) -c -emit-llvm --cuda-host-only -target x86_64-linux-gnu -o $(SQ_HOST_BC) $(SRCS)
|
||||
$(HIPCC) $(INCLUDES) -c -emit-llvm --cuda-device-only --offload-arch=$(GPU_ARCH1) --offload-arch=$(GPU_ARCH2) --offload-arch=$(GPU_ARCH3) --offload-arch=$(GPU_ARCH4) --offload-arch=$(GPU_ARCH5) --offload-arch=$(GPU_ARCH6) --offload-arch=$(GPU_ARCH7) --offload-arch=$(GPU_ARCH8) --offload-arch=$(GPU_ARCH9) $(SRCS)
|
||||
|
||||
# By default, the LLVM IR Bitcode file names will be:
|
||||
# square-hip-amdgcn-amd-amdhsa-gfx900.bc
|
||||
|
||||
@@ -19,15 +19,7 @@ THE SOFTWARE.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#define CHECK(cmd) \
|
||||
{\
|
||||
hipError_t error = cmd;\
|
||||
if (error != hipSuccess) { \
|
||||
fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \
|
||||
exit(EXIT_FAILURE);\
|
||||
}\
|
||||
}
|
||||
#include "hip_helper.h"
|
||||
|
||||
/* This kernel is a placeholder for the kernel in LLVM IR generated by this
|
||||
* sample. It will be replaced by the kernel in LLVM IR.
|
||||
@@ -55,14 +47,14 @@ int main(int argc, char *argv[])
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
|
||||
hipDeviceProp_t props;
|
||||
CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/));
|
||||
checkHipErrors(hipGetDeviceProperties(&props, 0/*deviceID*/));
|
||||
printf ("info: running on device %s\n", props.name);
|
||||
|
||||
printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
A_h = (float*)malloc(Nbytes);
|
||||
CHECK(A_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
|
||||
checkHipErrors(A_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
|
||||
C_h = (float*)malloc(Nbytes);
|
||||
CHECK(C_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
|
||||
checkHipErrors(C_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
|
||||
// Fill with Phi + i
|
||||
for (size_t i=0; i<N; i++)
|
||||
{
|
||||
@@ -70,12 +62,12 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
printf ("info: allocate device mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
CHECK(hipMalloc(&A_d, Nbytes));
|
||||
CHECK(hipMalloc(&C_d, Nbytes));
|
||||
checkHipErrors(hipMalloc(&A_d, Nbytes));
|
||||
checkHipErrors(hipMalloc(&C_d, Nbytes));
|
||||
|
||||
|
||||
printf ("info: copy Host2Device\n");
|
||||
CHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
checkHipErrors ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
const unsigned blocks = 512;
|
||||
const unsigned threadsPerBlock = 256;
|
||||
@@ -84,12 +76,12 @@ int main(int argc, char *argv[])
|
||||
vector_square <<<blocks, threadsPerBlock>>> (C_d, A_d, N);
|
||||
|
||||
printf ("info: copy Device2Host\n");
|
||||
CHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
checkHipErrors ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
printf ("info: check result\n");
|
||||
printf ("info: checkHipErrors result\n");
|
||||
for (size_t i=0; i<N; i++) {
|
||||
if (C_h[i] != A_h[i] * A_h[i]) {
|
||||
CHECK(hipErrorUnknown);
|
||||
checkHipErrors(hipErrorUnknown);
|
||||
}
|
||||
}
|
||||
printf ("PASSED!\n");
|
||||
|
||||
@@ -22,11 +22,15 @@ project(cmake_hip_device_test)
|
||||
|
||||
cmake_minimum_required(VERSION 3.10.2)
|
||||
|
||||
include_directories(../../common)
|
||||
|
||||
# Find hip
|
||||
find_package(hip REQUIRED)
|
||||
|
||||
# Create the excutable
|
||||
add_executable(test_cpp square.cpp)
|
||||
|
||||
target_include_directories(test_cpp PRIVATE ../../common)
|
||||
|
||||
# Link with HIP
|
||||
target_link_libraries(test_cpp hip::device)
|
||||
@@ -22,16 +22,7 @@ THE SOFTWARE.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#define CHECK(cmd) \
|
||||
{\
|
||||
hipError_t error = cmd;\
|
||||
if (error != hipSuccess) { \
|
||||
fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \
|
||||
exit(EXIT_FAILURE);\
|
||||
}\
|
||||
}
|
||||
|
||||
#include "hip_helper.h"
|
||||
|
||||
/*
|
||||
* Square each element in the array A and write to array C.
|
||||
@@ -57,14 +48,14 @@ int main(int argc, char *argv[])
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
|
||||
hipDeviceProp_t props;
|
||||
CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/));
|
||||
checkHipErrors(hipGetDeviceProperties(&props, 0/*deviceID*/));
|
||||
printf ("info: running on device %s\n", props.name);
|
||||
|
||||
printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
A_h = (float*)malloc(Nbytes);
|
||||
CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess );
|
||||
checkHipErrors(A_h == 0 ? hipErrorOutOfMemory : hipSuccess );
|
||||
C_h = (float*)malloc(Nbytes);
|
||||
CHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess );
|
||||
checkHipErrors(C_h == 0 ? hipErrorOutOfMemory : hipSuccess );
|
||||
// Fill with Phi + i
|
||||
for (size_t i=0; i<N; i++)
|
||||
{
|
||||
@@ -72,12 +63,12 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
printf ("info: allocate device mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
CHECK(hipMalloc(&A_d, Nbytes));
|
||||
CHECK(hipMalloc(&C_d, Nbytes));
|
||||
checkHipErrors(hipMalloc(&A_d, Nbytes));
|
||||
checkHipErrors(hipMalloc(&C_d, Nbytes));
|
||||
|
||||
|
||||
printf ("info: copy Host2Device\n");
|
||||
CHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
checkHipErrors ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
const unsigned blocks = 512;
|
||||
const unsigned threadsPerBlock = 256;
|
||||
@@ -86,12 +77,12 @@ int main(int argc, char *argv[])
|
||||
hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N);
|
||||
|
||||
printf ("info: copy Device2Host\n");
|
||||
CHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
checkHipErrors ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
printf ("info: check result\n");
|
||||
printf ("info: checkHipErrors result\n");
|
||||
for (size_t i=0; i<N; i++) {
|
||||
if (C_h[i] != A_h[i] * A_h[i]) {
|
||||
CHECK(hipErrorUnknown);
|
||||
checkHipErrors(hipErrorUnknown);
|
||||
}
|
||||
}
|
||||
printf ("PASSED!\n");
|
||||
|
||||
@@ -10,5 +10,8 @@ add_executable(test_fortran TestFortran.F90)
|
||||
add_executable(test_cpp MatrixTranspose.cpp)
|
||||
|
||||
target_link_libraries(test_cpp PUBLIC hip::device)
|
||||
|
||||
target_include_directories(test_cpp PRIVATE ../../common)
|
||||
|
||||
# Assuming to build a C/C++-to-Fortran library binding.
|
||||
target_link_libraries(test_fortran PUBLIC hip::device)
|
||||
|
||||
@@ -24,7 +24,7 @@ THE SOFTWARE.
|
||||
|
||||
// hip header file
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#include "hip_helper.h"
|
||||
|
||||
#define WIDTH 1024
|
||||
|
||||
@@ -61,7 +61,7 @@ int main() {
|
||||
float* gpuTransposeMatrix;
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
hipGetDeviceProperties(&devProp, 0);
|
||||
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
|
||||
|
||||
std::cout << "Device name " << devProp.name << std::endl;
|
||||
|
||||
@@ -78,11 +78,11 @@ int main() {
|
||||
}
|
||||
|
||||
// allocate the memory on the device side
|
||||
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
|
||||
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
|
||||
checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
|
||||
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
|
||||
|
||||
// Memory transfer from host to device
|
||||
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
|
||||
checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
|
||||
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
|
||||
@@ -90,7 +90,7 @@ int main() {
|
||||
gpuMatrix, WIDTH);
|
||||
|
||||
// Memory transfer from device to host
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
|
||||
checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
|
||||
|
||||
// CPU MatrixTranspose computation
|
||||
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
|
||||
@@ -110,8 +110,8 @@ int main() {
|
||||
}
|
||||
|
||||
// free the resources on device side
|
||||
hipFree(gpuMatrix);
|
||||
hipFree(gpuTransposeMatrix);
|
||||
checkHipErrors(hipFree(gpuMatrix));
|
||||
checkHipErrors(hipFree(gpuTransposeMatrix));
|
||||
|
||||
// free the resources on host side
|
||||
free(Matrix);
|
||||
|
||||
@@ -40,5 +40,7 @@ set(CMAKE_BUILD_TYPE Release)
|
||||
# Create the excutable
|
||||
add_executable(hipEvent hipEvent.cpp)
|
||||
|
||||
target_include_directories(hipEvent PRIVATE ../../common)
|
||||
|
||||
# Link with HIP
|
||||
target_link_libraries(hipEvent hip::host)
|
||||
|
||||
@@ -30,6 +30,7 @@ TARGET=hcc
|
||||
|
||||
SOURCES = hipEvent.cpp
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
INCLUDES := -I../../common
|
||||
|
||||
EXECUTABLE=./hipEvent
|
||||
|
||||
@@ -38,7 +39,7 @@ EXECUTABLE=./hipEvent
|
||||
|
||||
all: $(EXECUTABLE) test
|
||||
|
||||
CXXFLAGS =-g
|
||||
CXXFLAGS =-g $(INCLUDES)
|
||||
CXX=$(HIPCC)
|
||||
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ THE SOFTWARE.
|
||||
|
||||
// hip header file
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_helper.h"
|
||||
|
||||
#define WIDTH 1024
|
||||
|
||||
@@ -59,13 +60,13 @@ int main() {
|
||||
float* gpuTransposeMatrix;
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
hipGetDeviceProperties(&devProp, 0);
|
||||
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
|
||||
|
||||
std::cout << "Device name " << devProp.name << std::endl;
|
||||
|
||||
hipEvent_t start, stop;
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
checkHipErrors(hipEventCreate(&start));
|
||||
checkHipErrors(hipEventCreate(&stop));
|
||||
float eventMs = 1.0f;
|
||||
|
||||
int i;
|
||||
@@ -81,25 +82,25 @@ int main() {
|
||||
}
|
||||
|
||||
// allocate the memory on the device side
|
||||
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
|
||||
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
|
||||
checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
|
||||
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
|
||||
|
||||
// Record the start event
|
||||
hipEventRecord(start, NULL);
|
||||
checkHipErrors(hipEventRecord(start, NULL));
|
||||
|
||||
// Memory transfer from host to device
|
||||
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
|
||||
checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
|
||||
|
||||
// Record the stop event
|
||||
hipEventRecord(stop, NULL);
|
||||
hipEventSynchronize(stop);
|
||||
checkHipErrors(hipEventRecord(stop, NULL));
|
||||
checkHipErrors(hipEventSynchronize(stop));
|
||||
|
||||
hipEventElapsedTime(&eventMs, start, stop);
|
||||
checkHipErrors(hipEventElapsedTime(&eventMs, start, stop));
|
||||
|
||||
printf("hipMemcpyHostToDevice time taken = %6.3fms\n", eventMs);
|
||||
|
||||
// Record the start event
|
||||
hipEventRecord(start, NULL);
|
||||
checkHipErrors(hipEventRecord(start, NULL));
|
||||
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
|
||||
@@ -107,24 +108,24 @@ int main() {
|
||||
gpuMatrix, WIDTH);
|
||||
|
||||
// Record the stop event
|
||||
hipEventRecord(stop, NULL);
|
||||
hipEventSynchronize(stop);
|
||||
checkHipErrors(hipEventRecord(stop, NULL));
|
||||
checkHipErrors(hipEventSynchronize(stop));
|
||||
|
||||
hipEventElapsedTime(&eventMs, start, stop);
|
||||
checkHipErrors(hipEventElapsedTime(&eventMs, start, stop));
|
||||
|
||||
printf("kernel Execution time = %6.3fms\n", eventMs);
|
||||
|
||||
// Record the start event
|
||||
hipEventRecord(start, NULL);
|
||||
checkHipErrors(hipEventRecord(start, NULL));
|
||||
|
||||
// Memory transfer from device to host
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
|
||||
checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
|
||||
|
||||
// Record the stop event
|
||||
hipEventRecord(stop, NULL);
|
||||
hipEventSynchronize(stop);
|
||||
checkHipErrors(hipEventRecord(stop, NULL));
|
||||
checkHipErrors(hipEventSynchronize(stop));
|
||||
|
||||
hipEventElapsedTime(&eventMs, start, stop);
|
||||
checkHipErrors(hipEventElapsedTime(&eventMs, start, stop));
|
||||
|
||||
printf("hipMemcpyDeviceToHost time taken = %6.3fms\n", eventMs);
|
||||
|
||||
@@ -146,8 +147,8 @@ int main() {
|
||||
}
|
||||
|
||||
// free the resources on device side
|
||||
hipFree(gpuMatrix);
|
||||
hipFree(gpuTransposeMatrix);
|
||||
checkHipErrors(hipFree(gpuMatrix));
|
||||
checkHipErrors(hipFree(gpuTransposeMatrix));
|
||||
|
||||
// free the resources on host side
|
||||
free(Matrix);
|
||||
|
||||
@@ -30,5 +30,7 @@ find_package(hip REQUIRED)
|
||||
# Create the excutable
|
||||
add_executable(square square.cpp)
|
||||
|
||||
target_include_directories(square PRIVATE ../../common)
|
||||
|
||||
# Link with HIP
|
||||
target_link_libraries(square hip::device)
|
||||
|
||||
@@ -22,16 +22,7 @@ THE SOFTWARE.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#define CHECK(cmd) \
|
||||
{\
|
||||
hipError_t error = cmd;\
|
||||
if (error != hipSuccess) { \
|
||||
fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \
|
||||
exit(EXIT_FAILURE);\
|
||||
}\
|
||||
}
|
||||
|
||||
#include "hip_helper.h"
|
||||
|
||||
/*
|
||||
* Square each element in the array A and write to array C.
|
||||
@@ -57,14 +48,14 @@ int main(int argc, char *argv[])
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
|
||||
hipDeviceProp_t props;
|
||||
CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/));
|
||||
checkHipErrors(hipGetDeviceProperties(&props, 0/*deviceID*/));
|
||||
printf ("info: running on device %s\n", props.name);
|
||||
|
||||
printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
A_h = (float*)malloc(Nbytes);
|
||||
CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess );
|
||||
checkHipErrors(A_h == 0 ? hipErrorOutOfMemory : hipSuccess );
|
||||
C_h = (float*)malloc(Nbytes);
|
||||
CHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess );
|
||||
checkHipErrors(C_h == 0 ? hipErrorOutOfMemory : hipSuccess );
|
||||
// Fill with Phi + i
|
||||
for (size_t i=0; i<N; i++)
|
||||
{
|
||||
@@ -72,12 +63,12 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
printf ("info: allocate device mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
CHECK(hipMalloc(&A_d, Nbytes));
|
||||
CHECK(hipMalloc(&C_d, Nbytes));
|
||||
checkHipErrors(hipMalloc(&A_d, Nbytes));
|
||||
checkHipErrors(hipMalloc(&C_d, Nbytes));
|
||||
|
||||
|
||||
printf ("info: copy Host2Device\n");
|
||||
CHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
checkHipErrors ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
const unsigned blocks = 512;
|
||||
const unsigned threadsPerBlock = 256;
|
||||
@@ -86,12 +77,12 @@ int main(int argc, char *argv[])
|
||||
hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N);
|
||||
|
||||
printf ("info: copy Device2Host\n");
|
||||
CHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
checkHipErrors ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
printf ("info: check result\n");
|
||||
for (size_t i=0; i<N; i++) {
|
||||
if (C_h[i] != A_h[i] * A_h[i]) {
|
||||
CHECK(hipErrorUnknown);
|
||||
checkHipErrors(hipErrorUnknown);
|
||||
}
|
||||
}
|
||||
printf ("PASSED!\n");
|
||||
|
||||
@@ -25,3 +25,5 @@ project(cmake_hip_lang_support VERSION 1.0
|
||||
LANGUAGES HIP)
|
||||
# Create the executable
|
||||
add_executable(square square.hip)
|
||||
|
||||
target_include_directories(square PRIVATE ../../common)
|
||||
|
||||
@@ -22,16 +22,7 @@ THE SOFTWARE.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#define CHECK(cmd) \
|
||||
{\
|
||||
hipError_t error = cmd;\
|
||||
if (error != hipSuccess) { \
|
||||
fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \
|
||||
exit(EXIT_FAILURE);\
|
||||
}\
|
||||
}
|
||||
|
||||
#include "hip_helper.h"
|
||||
|
||||
/*
|
||||
* Square each element in the array A and write to array C.
|
||||
@@ -57,14 +48,14 @@ int main(int argc, char *argv[])
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
|
||||
hipDeviceProp_t props;
|
||||
CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/));
|
||||
checkHipErrors(hipGetDeviceProperties(&props, 0/*deviceID*/));
|
||||
printf ("info: running on device %s\n", props.name);
|
||||
|
||||
printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
A_h = (float*)malloc(Nbytes);
|
||||
CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess );
|
||||
checkHipErrors(A_h == 0 ? hipErrorOutOfMemory : hipSuccess );
|
||||
C_h = (float*)malloc(Nbytes);
|
||||
CHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess );
|
||||
checkHipErrors(C_h == 0 ? hipErrorOutOfMemory : hipSuccess );
|
||||
// Fill with Phi + i
|
||||
for (size_t i=0; i<N; i++)
|
||||
{
|
||||
@@ -72,12 +63,12 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
printf ("info: allocate device mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
CHECK(hipMalloc(&A_d, Nbytes));
|
||||
CHECK(hipMalloc(&C_d, Nbytes));
|
||||
checkHipErrors(hipMalloc(&A_d, Nbytes));
|
||||
checkHipErrors(hipMalloc(&C_d, Nbytes));
|
||||
|
||||
|
||||
printf ("info: copy Host2Device\n");
|
||||
CHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
checkHipErrors ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
const unsigned blocks = 512;
|
||||
const unsigned threadsPerBlock = 256;
|
||||
@@ -86,12 +77,12 @@ int main(int argc, char *argv[])
|
||||
hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N);
|
||||
|
||||
printf ("info: copy Device2Host\n");
|
||||
CHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
checkHipErrors ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
printf ("info: check result\n");
|
||||
for (size_t i=0; i<N; i++) {
|
||||
if (C_h[i] != A_h[i] * A_h[i]) {
|
||||
CHECK(hipErrorUnknown);
|
||||
checkHipErrors(hipErrorUnknown);
|
||||
}
|
||||
}
|
||||
printf ("PASSED!\n");
|
||||
|
||||
@@ -34,3 +34,5 @@ add_executable(test saxpy.cpp)
|
||||
target_link_libraries(test hiprtc::hiprtc)
|
||||
# Link with HIP
|
||||
target_link_libraries(test hip::device)
|
||||
|
||||
target_include_directories(test PRIVATE ../../common)
|
||||
|
||||
@@ -22,6 +22,7 @@ THE SOFTWARE.
|
||||
|
||||
#include <hip/hiprtc.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip_helper.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
@@ -69,7 +70,7 @@ int main()
|
||||
|
||||
hipDeviceProp_t props;
|
||||
int device = 0;
|
||||
hipGetDeviceProperties(&props, device);
|
||||
checkHipErrors(hipGetDeviceProperties(&props, device));
|
||||
|
||||
const char* options[] = {};
|
||||
|
||||
@@ -100,8 +101,8 @@ int main()
|
||||
hipModule_t module;
|
||||
hipFunction_t kernel;
|
||||
|
||||
hipModuleLoadData(&module, code.data());
|
||||
hipModuleGetFunction(&kernel, module, "saxpy");
|
||||
checkHipErrors(hipModuleLoadData(&module, code.data()));
|
||||
checkHipErrors(hipModuleGetFunction(&kernel, module, "saxpy"));
|
||||
|
||||
size_t n = NUM_THREADS * NUM_BLOCKS;
|
||||
size_t bufferSize = n * sizeof(float);
|
||||
@@ -117,11 +118,11 @@ int main()
|
||||
}
|
||||
|
||||
hipDeviceptr_t dX, dY, dOut;
|
||||
hipMalloc((void **)&dX, bufferSize);
|
||||
hipMalloc((void **)&dY, bufferSize);
|
||||
hipMalloc((void **)&dOut, bufferSize);
|
||||
hipMemcpyHtoD(dX, hX.get(), bufferSize);
|
||||
hipMemcpyHtoD(dY, hY.get(), bufferSize);
|
||||
checkHipErrors(hipMalloc((void **)&dX, bufferSize));
|
||||
checkHipErrors(hipMalloc((void **)&dY, bufferSize));
|
||||
checkHipErrors(hipMalloc((void **)&dOut, bufferSize));
|
||||
checkHipErrors(hipMemcpyHtoD(dX, hX.get(), bufferSize));
|
||||
checkHipErrors(hipMemcpyHtoD(dY, hY.get(), bufferSize));
|
||||
|
||||
struct {
|
||||
float a_;
|
||||
@@ -136,9 +137,9 @@ int main()
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
|
||||
hipModuleLaunchKernel(kernel, NUM_BLOCKS, 1, 1, NUM_THREADS, 1, 1,
|
||||
0, nullptr, nullptr, config);
|
||||
hipMemcpyDtoH(hOut.get(), dOut, bufferSize);
|
||||
checkHipErrors(hipModuleLaunchKernel(kernel, NUM_BLOCKS, 1, 1, NUM_THREADS, 1, 1,
|
||||
0, nullptr, nullptr, config));
|
||||
checkHipErrors(hipMemcpyDtoH(hOut.get(), dOut, bufferSize));
|
||||
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
if (fabs(a * hX[i] + hY[i] - hOut[i]) > fabs(hOut[i])* 1e-6) {
|
||||
@@ -146,11 +147,11 @@ int main()
|
||||
}
|
||||
}
|
||||
|
||||
hipFree((void *)dX);
|
||||
hipFree((void *)dY);
|
||||
hipFree((void *)dOut);
|
||||
checkHipErrors(hipFree((void *)dX));
|
||||
checkHipErrors(hipFree((void *)dY));
|
||||
checkHipErrors(hipFree((void *)dOut));
|
||||
|
||||
hipModuleUnload(module);
|
||||
checkHipErrors(hipModuleUnload(module));
|
||||
|
||||
cout << "SAXPY test completed" << endl;
|
||||
}
|
||||
|
||||
@@ -40,5 +40,7 @@ set(CMAKE_BUILD_TYPE Release)
|
||||
# Create the excutable
|
||||
add_executable(sharedMemory sharedMemory.cpp)
|
||||
|
||||
target_include_directories(sharedMemory PRIVATE ../../common)
|
||||
|
||||
# Link with HIP
|
||||
target_link_libraries(sharedMemory hip::host)
|
||||
|
||||
@@ -32,6 +32,7 @@ TARGET=hcc
|
||||
|
||||
SOURCES = sharedMemory.cpp
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
INCLUDES := -I../../common
|
||||
|
||||
EXECUTABLE=./sharedMemory
|
||||
|
||||
@@ -40,7 +41,7 @@ EXECUTABLE=./sharedMemory
|
||||
|
||||
all: $(EXECUTABLE) test
|
||||
|
||||
CXXFLAGS =-g
|
||||
CXXFLAGS =-g $(INCLUDES)
|
||||
CXX=$(HIPCC)
|
||||
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ THE SOFTWARE.
|
||||
|
||||
// hip header file
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#include "hip_helper.h"
|
||||
|
||||
#define WIDTH 64
|
||||
|
||||
@@ -66,7 +66,7 @@ int main() {
|
||||
float* gpuTransposeMatrix;
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
hipGetDeviceProperties(&devProp, 0);
|
||||
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
|
||||
|
||||
std::cout << "Device name " << devProp.name << std::endl;
|
||||
|
||||
@@ -83,11 +83,11 @@ int main() {
|
||||
}
|
||||
|
||||
// allocate the memory on the device side
|
||||
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
|
||||
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
|
||||
checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
|
||||
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
|
||||
|
||||
// Memory transfer from host to device
|
||||
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
|
||||
checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
|
||||
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
|
||||
@@ -95,7 +95,7 @@ int main() {
|
||||
gpuMatrix, WIDTH);
|
||||
|
||||
// Memory transfer from device to host
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
|
||||
checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
|
||||
|
||||
// CPU MatrixTranspose computation
|
||||
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
|
||||
@@ -116,8 +116,8 @@ int main() {
|
||||
}
|
||||
|
||||
// free the resources on device side
|
||||
hipFree(gpuMatrix);
|
||||
hipFree(gpuTransposeMatrix);
|
||||
checkHipErrors(hipFree(gpuMatrix));
|
||||
checkHipErrors(hipFree(gpuTransposeMatrix));
|
||||
|
||||
// free the resources on host side
|
||||
free(Matrix);
|
||||
|
||||
@@ -40,5 +40,7 @@ set(CMAKE_BUILD_TYPE Release)
|
||||
# Create the excutable
|
||||
add_executable(shfl shfl.cpp)
|
||||
|
||||
target_include_directories(shfl PRIVATE ../../common)
|
||||
|
||||
# Link with HIP
|
||||
target_link_libraries(shfl hip::host)
|
||||
|
||||
@@ -36,6 +36,7 @@ TARGET=hcc
|
||||
|
||||
SOURCES = shfl.cpp
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
INCLUDES := -I../../common
|
||||
|
||||
EXECUTABLE=./shfl
|
||||
|
||||
@@ -44,7 +45,7 @@ EXECUTABLE=./shfl
|
||||
|
||||
all: $(EXECUTABLE) test
|
||||
|
||||
CXXFLAGS =-g
|
||||
CXXFLAGS =-g $(INCLUDES)
|
||||
CXX=$(HIPCC)
|
||||
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ THE SOFTWARE.
|
||||
|
||||
// hip header file
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#include "hip_helper.h"
|
||||
|
||||
#define WIDTH 4
|
||||
|
||||
@@ -63,7 +63,7 @@ int main() {
|
||||
float* gpuTransposeMatrix;
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
hipGetDeviceProperties(&devProp, 0);
|
||||
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
|
||||
|
||||
std::cout << "Device name " << devProp.name << std::endl;
|
||||
|
||||
@@ -80,18 +80,18 @@ int main() {
|
||||
}
|
||||
|
||||
// allocate the memory on the device side
|
||||
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
|
||||
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
|
||||
checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
|
||||
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
|
||||
|
||||
// Memory transfer from host to device
|
||||
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
|
||||
checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
|
||||
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernelGGL(matrixTranspose, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y), 0, 0,
|
||||
gpuTransposeMatrix, gpuMatrix, WIDTH);
|
||||
|
||||
// Memory transfer from device to host
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
|
||||
checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
|
||||
|
||||
// CPU MatrixTranspose computation
|
||||
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
|
||||
@@ -112,8 +112,8 @@ int main() {
|
||||
}
|
||||
|
||||
// free the resources on device side
|
||||
hipFree(gpuMatrix);
|
||||
hipFree(gpuTransposeMatrix);
|
||||
checkHipErrors(hipFree(gpuMatrix));
|
||||
checkHipErrors(hipFree(gpuTransposeMatrix));
|
||||
|
||||
// free the resources on host side
|
||||
free(Matrix);
|
||||
|
||||
@@ -24,6 +24,7 @@ THE SOFTWARE.
|
||||
|
||||
// hip header file
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_helper.h"
|
||||
|
||||
|
||||
#define WIDTH 4
|
||||
@@ -61,7 +62,7 @@ int main() {
|
||||
float* gpuTransposeMatrix;
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
hipGetDeviceProperties(&devProp, 0);
|
||||
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
|
||||
|
||||
std::cout << "Device name " << devProp.name << std::endl;
|
||||
|
||||
@@ -78,18 +79,18 @@ int main() {
|
||||
}
|
||||
|
||||
// allocate the memory on the device side
|
||||
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
|
||||
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
|
||||
checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
|
||||
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
|
||||
|
||||
// Memory transfer from host to device
|
||||
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
|
||||
checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
|
||||
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernelGGL(matrixTranspose, dim3(1), dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0,
|
||||
gpuTransposeMatrix, gpuMatrix, WIDTH);
|
||||
|
||||
// Memory transfer from device to host
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
|
||||
checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
|
||||
|
||||
// CPU MatrixTranspose computation
|
||||
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
|
||||
@@ -110,8 +111,8 @@ int main() {
|
||||
}
|
||||
|
||||
// free the resources on device side
|
||||
hipFree(gpuMatrix);
|
||||
hipFree(gpuTransposeMatrix);
|
||||
checkHipErrors(hipFree(gpuMatrix));
|
||||
checkHipErrors(hipFree(gpuTransposeMatrix));
|
||||
|
||||
// free the resources on host side
|
||||
free(Matrix);
|
||||
|
||||
@@ -39,5 +39,7 @@ set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE})
|
||||
# Create the excutable
|
||||
add_executable(2dshfl 2dshfl.cpp)
|
||||
|
||||
target_include_directories(2dshfl PRIVATE ../../common)
|
||||
|
||||
# Link with HIP
|
||||
target_link_libraries(2dshfl hip::host)
|
||||
|
||||
@@ -36,6 +36,7 @@ TARGET=hcc
|
||||
|
||||
SOURCES = 2dshfl.cpp
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
INCLUDES := -I../../common
|
||||
|
||||
EXECUTABLE=./2dshfl
|
||||
|
||||
@@ -44,7 +45,7 @@ EXECUTABLE=./2dshfl
|
||||
|
||||
all: $(EXECUTABLE) test
|
||||
|
||||
CXXFLAGS =-g
|
||||
CXXFLAGS =-g $(INCLUDES)
|
||||
CXX=$(HIPCC)
|
||||
|
||||
|
||||
|
||||
@@ -22,6 +22,8 @@ project(dynamic_shared)
|
||||
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
include_directories(../../common)
|
||||
|
||||
if (NOT DEFINED ROCM_PATH )
|
||||
set ( ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory." )
|
||||
endif ()
|
||||
@@ -39,5 +41,7 @@ set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE})
|
||||
# Create the excutable
|
||||
add_executable(dynamic_shared dynamic_shared.cpp)
|
||||
|
||||
target_include_directories(dynamic_shared PRIVATE ../../common)
|
||||
|
||||
# Link with HIP
|
||||
target_link_libraries(dynamic_shared hip::host)
|
||||
|
||||
@@ -32,6 +32,7 @@ TARGET=hcc
|
||||
|
||||
SOURCES = dynamic_shared.cpp
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
INCLUDES := -I../../common
|
||||
|
||||
EXECUTABLE=./dynamic_shared
|
||||
|
||||
@@ -40,7 +41,7 @@ EXECUTABLE=./dynamic_shared
|
||||
|
||||
all: $(EXECUTABLE) test
|
||||
|
||||
CXXFLAGS =-g
|
||||
CXXFLAGS =-g $(INCLUDES)
|
||||
CXX=$(HIPCC)
|
||||
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ THE SOFTWARE.
|
||||
|
||||
// hip header file
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_helper.h"
|
||||
|
||||
#define WIDTH 16
|
||||
|
||||
@@ -65,7 +66,7 @@ int main() {
|
||||
float* gpuTransposeMatrix;
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
hipGetDeviceProperties(&devProp, 0);
|
||||
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
|
||||
|
||||
std::cout << "Device name " << devProp.name << std::endl;
|
||||
|
||||
@@ -82,11 +83,11 @@ int main() {
|
||||
}
|
||||
|
||||
// allocate the memory on the device side
|
||||
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
|
||||
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
|
||||
checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
|
||||
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
|
||||
|
||||
// Memory transfer from host to device
|
||||
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
|
||||
checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
|
||||
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
|
||||
@@ -94,7 +95,7 @@ int main() {
|
||||
0, gpuTransposeMatrix, gpuMatrix, WIDTH);
|
||||
|
||||
// Memory transfer from device to host
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
|
||||
checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
|
||||
|
||||
// CPU MatrixTranspose computation
|
||||
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
|
||||
@@ -115,8 +116,8 @@ int main() {
|
||||
}
|
||||
|
||||
// free the resources on device side
|
||||
hipFree(gpuMatrix);
|
||||
hipFree(gpuTransposeMatrix);
|
||||
checkHipErrors(hipFree(gpuMatrix));
|
||||
checkHipErrors(hipFree(gpuTransposeMatrix));
|
||||
|
||||
// free the resources on host side
|
||||
free(Matrix);
|
||||
|
||||
@@ -39,5 +39,7 @@ set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE})
|
||||
# Create the excutable
|
||||
add_executable(stream stream.cpp)
|
||||
|
||||
target_include_directories(stream PRIVATE ../../common)
|
||||
|
||||
# Link with HIP
|
||||
target_link_libraries(stream hip::host)
|
||||
|
||||
@@ -32,6 +32,7 @@ TARGET=hcc
|
||||
|
||||
SOURCES = stream.cpp
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
INCLUDES := -I../../common
|
||||
|
||||
EXECUTABLE=./stream
|
||||
|
||||
@@ -40,7 +41,7 @@ EXECUTABLE=./stream
|
||||
|
||||
all: $(EXECUTABLE) test
|
||||
|
||||
CXXFLAGS =-g
|
||||
CXXFLAGS =-g $(INCLUDES)
|
||||
CXX=$(HIPCC)
|
||||
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ THE SOFTWARE.
|
||||
|
||||
#include <iostream>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "hip_helper.h"
|
||||
|
||||
#define WIDTH 32
|
||||
|
||||
@@ -66,11 +67,11 @@ void MultipleStream(float** data, float* randArray, float** gpuTransposeMatrix,
|
||||
const int num_streams = 2;
|
||||
hipStream_t streams[num_streams];
|
||||
|
||||
for (int i = 0; i < num_streams; i++) hipStreamCreate(&streams[i]);
|
||||
for (int i = 0; i < num_streams; i++) checkHipErrors(hipStreamCreate(&streams[i]));
|
||||
|
||||
for (int i = 0; i < num_streams; i++) {
|
||||
hipMalloc((void**)&data[i], NUM * sizeof(float));
|
||||
hipMemcpyAsync(data[i], randArray, NUM * sizeof(float), hipMemcpyHostToDevice, streams[i]);
|
||||
checkHipErrors(hipMalloc((void**)&data[i], NUM * sizeof(float)));
|
||||
checkHipErrors(hipMemcpyAsync(data[i], randArray, NUM * sizeof(float), hipMemcpyHostToDevice, streams[i]));
|
||||
}
|
||||
|
||||
hipLaunchKernelGGL(matrixTranspose_static_shared,
|
||||
@@ -84,12 +85,12 @@ void MultipleStream(float** data, float* randArray, float** gpuTransposeMatrix,
|
||||
streams[1], gpuTransposeMatrix[1], data[1], width);
|
||||
|
||||
for (int i = 0; i < num_streams; i++)
|
||||
hipMemcpyAsync(TransposeMatrix[i], gpuTransposeMatrix[i], NUM * sizeof(float),
|
||||
hipMemcpyDeviceToHost, streams[i]);
|
||||
checkHipErrors(hipMemcpyAsync(TransposeMatrix[i], gpuTransposeMatrix[i], NUM * sizeof(float),
|
||||
hipMemcpyDeviceToHost, streams[i]));
|
||||
}
|
||||
|
||||
int main() {
|
||||
hipSetDevice(0);
|
||||
checkHipErrors(hipSetDevice(0));
|
||||
|
||||
float *data[2], *TransposeMatrix[2], *gpuTransposeMatrix[2], *randArray;
|
||||
|
||||
@@ -100,8 +101,8 @@ int main() {
|
||||
TransposeMatrix[0] = (float*)malloc(NUM * sizeof(float));
|
||||
TransposeMatrix[1] = (float*)malloc(NUM * sizeof(float));
|
||||
|
||||
hipMalloc((void**)&gpuTransposeMatrix[0], NUM * sizeof(float));
|
||||
hipMalloc((void**)&gpuTransposeMatrix[1], NUM * sizeof(float));
|
||||
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix[0], NUM * sizeof(float)));
|
||||
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix[1], NUM * sizeof(float)));
|
||||
|
||||
for (int i = 0; i < NUM; i++) {
|
||||
randArray[i] = (float)i * 1.0f;
|
||||
@@ -109,7 +110,7 @@ int main() {
|
||||
|
||||
MultipleStream(data, randArray, gpuTransposeMatrix, TransposeMatrix, width);
|
||||
|
||||
hipDeviceSynchronize();
|
||||
checkHipErrors(hipDeviceSynchronize());
|
||||
|
||||
// verify the results
|
||||
int errors = 0;
|
||||
@@ -128,11 +129,11 @@ int main() {
|
||||
|
||||
free(randArray);
|
||||
for (int i = 0; i < 2; i++) {
|
||||
hipFree(data[i]);
|
||||
hipFree(gpuTransposeMatrix[i]);
|
||||
checkHipErrors(hipFree(data[i]));
|
||||
checkHipErrors(hipFree(gpuTransposeMatrix[i]));
|
||||
free(TransposeMatrix[i]);
|
||||
}
|
||||
|
||||
hipDeviceReset();
|
||||
checkHipErrors(hipDeviceReset());
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -39,5 +39,7 @@ set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE})
|
||||
# Create the excutable
|
||||
add_executable(unroll unroll.cpp)
|
||||
|
||||
target_include_directories(unroll PRIVATE ../../common)
|
||||
|
||||
# Link with HIP
|
||||
target_link_libraries(unroll hip::host)
|
||||
|
||||
@@ -36,6 +36,7 @@ TARGET=hcc
|
||||
|
||||
SOURCES = unroll.cpp
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
INCLUDES := -I../../common
|
||||
|
||||
EXECUTABLE=./unroll
|
||||
|
||||
@@ -44,7 +45,7 @@ EXECUTABLE=./unroll
|
||||
|
||||
all: $(EXECUTABLE) test
|
||||
|
||||
CXXFLAGS =-g
|
||||
CXXFLAGS =-g $(INCLUDES)
|
||||
CXX=$(HIPCC)
|
||||
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ THE SOFTWARE.
|
||||
|
||||
// hip header file
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_helper.h"
|
||||
|
||||
#define LENGTH 4
|
||||
|
||||
@@ -59,7 +60,7 @@ int main() {
|
||||
int* gpuSumMatrix;
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
hipGetDeviceProperties(&devProp, 0);
|
||||
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
|
||||
|
||||
std::cout << "Device name " << devProp.name << std::endl;
|
||||
|
||||
@@ -76,19 +77,19 @@ int main() {
|
||||
}
|
||||
|
||||
// Allocated Device Memory
|
||||
hipMalloc((void**)&gpuMatrix, SIZE * sizeof(int));
|
||||
hipMalloc((void**)&gpuSumMatrix, LENGTH * sizeof(int));
|
||||
checkHipErrors(hipMalloc((void**)&gpuMatrix, SIZE * sizeof(int)));
|
||||
checkHipErrors(hipMalloc((void**)&gpuSumMatrix, LENGTH * sizeof(int)));
|
||||
|
||||
// Memory Copy to Device
|
||||
hipMemcpy(gpuMatrix, Matrix, SIZE * sizeof(int), hipMemcpyHostToDevice);
|
||||
hipMemcpy(gpuSumMatrix, cpuSumMatrix, LENGTH * sizeof(float), hipMemcpyHostToDevice);
|
||||
checkHipErrors(hipMemcpy(gpuMatrix, Matrix, SIZE * sizeof(int), hipMemcpyHostToDevice));
|
||||
checkHipErrors(hipMemcpy(gpuSumMatrix, cpuSumMatrix, LENGTH * sizeof(float), hipMemcpyHostToDevice));
|
||||
|
||||
// Launch device kernels
|
||||
hipLaunchKernelGGL(gpuMatrixRowSum, dim3(BLOCKS_PER_GRID), dim3(THREADS_PER_BLOCK), 0, 0,
|
||||
gpuMatrix, gpuSumMatrix, LENGTH);
|
||||
|
||||
// Memory copy back to device
|
||||
hipMemcpy(sumMatrix, gpuSumMatrix, LENGTH * sizeof(int), hipMemcpyDeviceToHost);
|
||||
checkHipErrors(hipMemcpy(sumMatrix, gpuSumMatrix, LENGTH * sizeof(int), hipMemcpyDeviceToHost));
|
||||
|
||||
// Cpu implementation
|
||||
matrixRowSum(Matrix, cpuSumMatrix, LENGTH);
|
||||
@@ -110,8 +111,8 @@ int main() {
|
||||
}
|
||||
|
||||
// GPU Free
|
||||
hipFree(gpuMatrix);
|
||||
hipFree(gpuSumMatrix);
|
||||
checkHipErrors(hipFree(gpuMatrix));
|
||||
checkHipErrors(hipFree(gpuSumMatrix));
|
||||
|
||||
// CPU Free
|
||||
free(Matrix);
|
||||
|
||||
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#ifndef checkHipErrors
|
||||
#define checkHipErrors(err) __checkHipErrors(err, __FILE__, __LINE__)
|
||||
|
||||
inline void __checkHipErrors(hipError_t err, const char *file, const int line) {
|
||||
if (HIP_SUCCESS != err) {
|
||||
const char *errorStr = hipGetErrorString(err);
|
||||
fprintf(stderr,
|
||||
"checkHipErrors() HIP API error = %04d \"%s\" from file <%s>, "
|
||||
"line %i.\n",
|
||||
err, errorStr, file, line);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
Reference in New Issue
Block a user