From 7cc53f992ffc070b7dea7dae82f99db161f59098 Mon Sep 17 00:00:00 2001 From: ROCm CI Service Account <66695075+rocm-ci@users.noreply.github.com> Date: Thu, 20 Jul 2023 10:22:17 +0530 Subject: [PATCH] SWDEV-402381 - Add hipCheckErrors for HIP API calls in samples (#375) Change-Id: I335d7e780362fc59fd2d90939b4c8b8a7231ffc7 --- samples/0_Intro/bit_extract/CMakeLists.txt | 2 + samples/0_Intro/bit_extract/Makefile | 3 +- samples/0_Intro/bit_extract/bit_extract.cpp | 29 ++++------- samples/0_Intro/module_api/CMakeLists.txt | 2 + samples/0_Intro/module_api/Makefile | 9 ++-- samples/0_Intro/module_api/defaultDriver.cpp | 27 +++++----- .../0_Intro/module_api/launchKernelHcc.cpp | 33 ++++++------- samples/0_Intro/module_api/runKernel.cpp | 31 +++++------- .../0_Intro/module_api_global/CMakeLists.txt | 2 + samples/0_Intro/module_api_global/Makefile | 3 +- .../0_Intro/module_api_global/runKernel.cpp | 24 ++++----- .../1_Utils/hipDispatchLatency/CMakeLists.txt | 2 + samples/1_Utils/hipDispatchLatency/Makefile | 3 +- .../hipDispatchEnqueueRateMT.cpp | 27 ++++------ .../hipDispatchLatency/hipDispatchLatency.cpp | 37 +++++++------- samples/1_Utils/hipInfo/CMakeLists.txt | 2 + samples/1_Utils/hipInfo/Makefile | 3 +- samples/1_Utils/hipInfo/hipInfo.cpp | 29 +++-------- .../0_MatrixTranspose/CMakeLists.txt | 2 + samples/2_Cookbook/0_MatrixTranspose/Makefile | 3 +- .../0_MatrixTranspose/MatrixTranspose.cpp | 15 +++--- .../2_Cookbook/10_inline_asm/CMakeLists.txt | 2 + samples/2_Cookbook/10_inline_asm/Makefile | 4 +- .../2_Cookbook/10_inline_asm/inline_asm.cpp | 43 ++++++++-------- .../11_texture_driver/CMakeLists.txt | 2 + samples/2_Cookbook/11_texture_driver/Makefile | 3 +- .../11_texture_driver/texture2dDrv.cpp | 43 +++++++--------- .../CMakeLists.txt | 2 + .../MatrixTranspose.cpp | 15 +++--- .../2_Cookbook/13_occupancy/CMakeLists.txt | 2 + samples/2_Cookbook/13_occupancy/Makefile | 4 +- samples/2_Cookbook/13_occupancy/occupancy.cpp | 49 +++++++++---------- samples/2_Cookbook/14_gpu_arch/CMakeLists.txt | 2 + samples/2_Cookbook/14_gpu_arch/Makefile | 4 +- samples/2_Cookbook/14_gpu_arch/gpuarch.cpp | 14 ++---- .../16_assembly_to_executable/Makefile | 5 +- .../16_assembly_to_executable/square.cpp | 28 ++++------- .../17_llvm_ir_to_executable/Makefile | 5 +- .../17_llvm_ir_to_executable/square.cpp | 28 ++++------- .../18_cmake_hip_device/CMakeLists.txt | 4 ++ .../2_Cookbook/18_cmake_hip_device/square.cpp | 29 ++++------- .../2_Cookbook/19_cmake_lang/CMakeLists.txt | 3 ++ .../19_cmake_lang/MatrixTranspose.cpp | 16 +++--- samples/2_Cookbook/1_hipEvent/CMakeLists.txt | 2 + samples/2_Cookbook/1_hipEvent/Makefile | 3 +- samples/2_Cookbook/1_hipEvent/hipEvent.cpp | 43 ++++++++-------- .../21_cmake_hip_cxx_clang/CMakeLists.txt | 2 + .../21_cmake_hip_cxx_clang/square.cpp | 27 ++++------ .../22_cmake_hip_lang/CMakeLists.txt | 2 + .../2_Cookbook/22_cmake_hip_lang/square.hip | 27 ++++------ .../2_Cookbook/23_cmake_hiprtc/CMakeLists.txt | 2 + samples/2_Cookbook/23_cmake_hiprtc/saxpy.cpp | 31 ++++++------ .../2_Cookbook/3_shared_memory/CMakeLists.txt | 2 + samples/2_Cookbook/3_shared_memory/Makefile | 3 +- .../3_shared_memory/sharedMemory.cpp | 16 +++--- samples/2_Cookbook/4_shfl/CMakeLists.txt | 2 + samples/2_Cookbook/4_shfl/Makefile | 3 +- samples/2_Cookbook/4_shfl/shfl.cpp | 16 +++--- samples/2_Cookbook/5_2dshfl/2dshfl.cpp | 15 +++--- samples/2_Cookbook/5_2dshfl/CMakeLists.txt | 2 + samples/2_Cookbook/5_2dshfl/Makefile | 3 +- .../6_dynamic_shared/CMakeLists.txt | 4 ++ samples/2_Cookbook/6_dynamic_shared/Makefile | 3 +- .../6_dynamic_shared/dynamic_shared.cpp | 15 +++--- samples/2_Cookbook/7_streams/CMakeLists.txt | 2 + samples/2_Cookbook/7_streams/Makefile | 3 +- samples/2_Cookbook/7_streams/stream.cpp | 25 +++++----- samples/2_Cookbook/9_unroll/CMakeLists.txt | 2 + samples/2_Cookbook/9_unroll/Makefile | 3 +- samples/2_Cookbook/9_unroll/unroll.cpp | 17 ++++--- samples/common/hip_helper.h | 38 ++++++++++++++ 71 files changed, 460 insertions(+), 448 deletions(-) create mode 100644 samples/common/hip_helper.h diff --git a/samples/0_Intro/bit_extract/CMakeLists.txt b/samples/0_Intro/bit_extract/CMakeLists.txt index d51e4d974a..4986dc1b1f 100644 --- a/samples/0_Intro/bit_extract/CMakeLists.txt +++ b/samples/0_Intro/bit_extract/CMakeLists.txt @@ -48,5 +48,7 @@ set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) # Create the excutable add_executable(bit_extract bit_extract.cpp) +target_include_directories(bit_extract PRIVATE ../../common) + # Link with HIP target_link_libraries(bit_extract hip::host) diff --git a/samples/0_Intro/bit_extract/Makefile b/samples/0_Intro/bit_extract/Makefile index 6f4d824ba8..939aca00a5 100644 --- a/samples/0_Intro/bit_extract/Makefile +++ b/samples/0_Intro/bit_extract/Makefile @@ -29,6 +29,7 @@ ifeq (,$(HIP_PATH)) endif HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform) HIPCC=$(HIP_PATH)/bin/hipcc +INCLUDES := -I../../common # Show how to use PLATFORM to specify different options for each compiler: ifeq (${HIP_PLATFORM}, nvcc) @@ -38,7 +39,7 @@ endif EXE=bit_extract $(EXE): bit_extract.cpp - $(HIPCC) $(HIPCC_FLAGS) $< -o $@ + $(HIPCC) $(HIPCC_FLAGS) $(INCLUDES) $< -o $@ all: $(EXE) diff --git a/samples/0_Intro/bit_extract/bit_extract.cpp b/samples/0_Intro/bit_extract/bit_extract.cpp index cf0440dd57..3f7636bc08 100644 --- a/samples/0_Intro/bit_extract/bit_extract.cpp +++ b/samples/0_Intro/bit_extract/bit_extract.cpp @@ -23,16 +23,7 @@ THE SOFTWARE. #include #include #include "hip/hip_runtime.h" - -#define CHECK(cmd) \ - { \ - hipError_t error = cmd; \ - if (error != hipSuccess) { \ - fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error, \ - __FILE__, __LINE__); \ - exit(EXIT_FAILURE); \ - } \ - } +#include "hip_helper.h" __global__ void bit_extract_kernel(uint32_t* C_d, const uint32_t* A_d, size_t N) { size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); @@ -69,28 +60,28 @@ int main(int argc, char* argv[]) { #endif int deviceId; - CHECK(hipGetDevice(&deviceId)); + checkHipErrors(hipGetDevice(&deviceId)); hipDeviceProp_t props; - CHECK(hipGetDeviceProperties(&props, deviceId)); + checkHipErrors(hipGetDeviceProperties(&props, deviceId)); printf("info: running on device #%d %s\n", deviceId, props.name); printf("info: allocate host mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0); A_h = (uint32_t*)malloc(Nbytes); - CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); + checkHipErrors(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); C_h = (uint32_t*)malloc(Nbytes); - CHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); + checkHipErrors(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); for (size_t i = 0; i < N; i++) { A_h[i] = i; } printf("info: allocate device mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0); - CHECK(hipMalloc(&A_d, Nbytes)); - CHECK(hipMalloc(&C_d, Nbytes)); + checkHipErrors(hipMalloc(&A_d, Nbytes)); + checkHipErrors(hipMalloc(&C_d, Nbytes)); printf("info: copy Host2Device\n"); - CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + checkHipErrors(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); printf("info: launch 'bit_extract_kernel' \n"); const unsigned blocks = 512; @@ -98,7 +89,7 @@ int main(int argc, char* argv[]) { hipLaunchKernelGGL(bit_extract_kernel, dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N); printf("info: copy Device2Host\n"); - CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + checkHipErrors(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); printf("info: check result\n"); for (size_t i = 0; i < N; i++) { @@ -106,7 +97,7 @@ int main(int argc, char* argv[]) { if (C_h[i] != Agold) { fprintf(stderr, "mismatch detected.\n"); printf("%zu: %08x =? %08x (Ain=%08x)\n", i, C_h[i], Agold, A_h[i]); - CHECK(hipErrorUnknown); + checkHipErrors(hipErrorUnknown); } } printf("PASSED!\n"); diff --git a/samples/0_Intro/module_api/CMakeLists.txt b/samples/0_Intro/module_api/CMakeLists.txt index cefe6e2c79..7106df44c0 100644 --- a/samples/0_Intro/module_api/CMakeLists.txt +++ b/samples/0_Intro/module_api/CMakeLists.txt @@ -22,6 +22,8 @@ project(module_api) cmake_minimum_required(VERSION 3.10) +include_directories(../../common) + if (NOT DEFINED ROCM_PATH ) set ( ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory." ) endif () diff --git a/samples/0_Intro/module_api/Makefile b/samples/0_Intro/module_api/Makefile index 118d16a7ef..ee3e68e067 100644 --- a/samples/0_Intro/module_api/Makefile +++ b/samples/0_Intro/module_api/Makefile @@ -27,20 +27,21 @@ ifeq (,$(HIP_PATH)) endif HIPCC=$(HIP_PATH)/bin/hipcc HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --compiler) +INCLUDES := -I../../common all: vcpy_kernel.code runKernel.hip.out launchKernelHcc.hip.out defaultDriver.hip.out runKernel.hip.out: runKernel.cpp - $(HIPCC) $(HIPCC_FLAGS) $< -o $@ + $(HIPCC) $(HIPCC_FLAGS) $(INCLUDES) $< -o $@ launchKernelHcc.hip.out: launchKernelHcc.cpp - $(HIPCC) $(HIPCC_FLAGS) $< -o $@ + $(HIPCC) $(HIPCC_FLAGS) $(INCLUDES) $< -o $@ defaultDriver.hip.out: defaultDriver.cpp - $(HIPCC) $(HIPCC_FLAGS) $< -o $@ + $(HIPCC) $(HIPCC_FLAGS) $(INCLUDES) $< -o $@ vcpy_kernel.code: vcpy_kernel.cpp - $(HIPCC) --genco $(GENCO_FLAGS) $^ -o $@ + $(HIPCC) --genco $(GENCO_FLAGS) $(INCLUDES) $^ -o $@ clean: rm -f *.code *.out diff --git a/samples/0_Intro/module_api/defaultDriver.cpp b/samples/0_Intro/module_api/defaultDriver.cpp index 9443842026..fc759f3f1c 100644 --- a/samples/0_Intro/module_api/defaultDriver.cpp +++ b/samples/0_Intro/module_api/defaultDriver.cpp @@ -24,6 +24,7 @@ THE SOFTWARE. #include #include #include +#include "hip_helper.h" #define LEN 64 #define SIZE LEN << 2 @@ -45,25 +46,25 @@ int main() { hipInit(0); hipDevice_t device; hipCtx_t context; - hipDeviceGet(&device, 0); - hipCtxCreate(&context, 0, device); + checkHipErrors(hipDeviceGet(&device, 0)); + checkHipErrors(hipCtxCreate(&context, 0, device)); - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); + checkHipErrors(hipMalloc((void**)&Ad, SIZE)); + checkHipErrors(hipMalloc((void**)&Bd, SIZE)); - hipMemcpyHtoD(Ad, A, SIZE); - hipMemcpyHtoD(Bd, B, SIZE); + checkHipErrors(hipMemcpyHtoD(Ad, A, SIZE)); + checkHipErrors(hipMemcpyHtoD(Bd, B, SIZE)); hipModule_t Module; hipFunction_t Function; - hipModuleLoad(&Module, fileName); - hipModuleGetFunction(&Function, Module, kernel_name); + checkHipErrors(hipModuleLoad(&Module, fileName)); + checkHipErrors(hipModuleGetFunction(&Function, Module, kernel_name)); void* args[2] = {&Ad, &Bd}; - hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, args, nullptr); + checkHipErrors(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, args, nullptr)); - hipMemcpyDtoH(B, Bd, SIZE); + checkHipErrors(hipMemcpyDtoH(B, Bd, SIZE)); int mismatchCount = 0; for (uint32_t i = 0; i < LEN; i++) { if (A[i] != B[i]) { @@ -78,10 +79,10 @@ int main() { std::cout << "FAILED!\n"; }; - hipFree(Ad); - hipFree(Bd); + checkHipErrors(hipFree(Ad)); + checkHipErrors(hipFree(Bd)); delete[] A; delete[] B; - hipCtxDestroy(context); + checkHipErrors(hipCtxDestroy(context)); return 0; } diff --git a/samples/0_Intro/module_api/launchKernelHcc.cpp b/samples/0_Intro/module_api/launchKernelHcc.cpp index 464f6d8851..3c44a375c7 100644 --- a/samples/0_Intro/module_api/launchKernelHcc.cpp +++ b/samples/0_Intro/module_api/launchKernelHcc.cpp @@ -25,6 +25,7 @@ THE SOFTWARE. #include #include #include +#include "hip_helper.h" #ifdef __HIP_PLATFORM_AMD__ #include @@ -36,12 +37,6 @@ THE SOFTWARE. #define fileName "vcpy_kernel.code" #define kernel_name "hello_world" -#define HIP_CHECK(status) \ - if (status != hipSuccess) { \ - std::cout << "Got Status: " << status << " at Line: " << __LINE__ << std::endl; \ - exit(0); \ - } - int main() { float *A, *B; hipDeviceptr_t Ad, Bd; @@ -56,18 +51,18 @@ int main() { hipInit(0); hipDevice_t device; hipCtx_t context; - hipDeviceGet(&device, 0); - hipCtxCreate(&context, 0, device); + checkHipErrors(hipDeviceGet(&device, 0)); + checkHipErrors(hipCtxCreate(&context, 0, device)); - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); + checkHipErrors(hipMalloc((void**)&Ad, SIZE)); + checkHipErrors(hipMalloc((void**)&Bd, SIZE)); - hipMemcpyHtoD(Ad, A, SIZE); - hipMemcpyHtoD(Bd, B, SIZE); + checkHipErrors(hipMemcpyHtoD(Ad, A, SIZE)); + checkHipErrors(hipMemcpyHtoD(Bd, B, SIZE)); hipModule_t Module; hipFunction_t Function; - HIP_CHECK(hipModuleLoad(&Module, fileName)); - HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name)); + checkHipErrors(hipModuleLoad(&Module, fileName)); + checkHipErrors(hipModuleGetFunction(&Function, Module, kernel_name)); struct { void* _Ad; @@ -83,10 +78,10 @@ int main() { void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END}; - HIP_CHECK( + checkHipErrors( hipExtModuleLaunchKernel(Function, LEN, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config, 0)); - hipMemcpyDtoH(B, Bd, SIZE); + checkHipErrors(hipMemcpyDtoH(B, Bd, SIZE)); int mismatchCount = 0; for (uint32_t i = 0; i < LEN; i++) { @@ -102,10 +97,10 @@ int main() { std::cout << "FAILED!\n"; }; - hipFree(Ad); - hipFree(Bd); + checkHipErrors(hipFree(Ad)); + checkHipErrors(hipFree(Bd)); delete[] A; delete[] B; - hipCtxDestroy(context); + checkHipErrors(hipCtxDestroy(context)); return 0; } diff --git a/samples/0_Intro/module_api/runKernel.cpp b/samples/0_Intro/module_api/runKernel.cpp index c2de0c6c0d..f4af0332fa 100644 --- a/samples/0_Intro/module_api/runKernel.cpp +++ b/samples/0_Intro/module_api/runKernel.cpp @@ -26,6 +26,7 @@ THE SOFTWARE. #include #include #include +#include "hip_helper.h" #define LEN 64 #define SIZE LEN << 2 @@ -33,12 +34,6 @@ THE SOFTWARE. #define fileName "vcpy_kernel.code" #define kernel_name "hello_world" -#define HIP_CHECK(status) \ - if (status != hipSuccess) { \ - std::cout << "Got Status: " << status << " at Line: " << __LINE__ << std::endl; \ - exit(0); \ - } - int main() { float *A, *B; hipDeviceptr_t Ad, Bd; @@ -53,18 +48,18 @@ int main() { hipInit(0); hipDevice_t device; hipCtx_t context; - hipDeviceGet(&device, 0); - hipCtxCreate(&context, 0, device); + checkHipErrors(hipDeviceGet(&device, 0)); + checkHipErrors(hipCtxCreate(&context, 0, device)); - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); + checkHipErrors(hipMalloc((void**)&Ad, SIZE)); + checkHipErrors(hipMalloc((void**)&Bd, SIZE)); - hipMemcpyHtoD(Ad, A, SIZE); - hipMemcpyHtoD(Bd, B, SIZE); + checkHipErrors(hipMemcpyHtoD(Ad, A, SIZE)); + checkHipErrors(hipMemcpyHtoD(Bd, B, SIZE)); hipModule_t Module; hipFunction_t Function; - HIP_CHECK(hipModuleLoad(&Module, fileName)); - HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name)); + checkHipErrors(hipModuleLoad(&Module, fileName)); + checkHipErrors(hipModuleGetFunction(&Function, Module, kernel_name)); struct { void* _Ad; @@ -79,9 +74,9 @@ int main() { void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END}; - HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config)); + checkHipErrors(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config)); - hipMemcpyDtoH(B, Bd, SIZE); + checkHipErrors(hipMemcpyDtoH(B, Bd, SIZE)); int mismatchCount = 0; for (uint32_t i = 0; i < LEN; i++) { @@ -97,10 +92,10 @@ int main() { std::cout << "FAILED!\n"; }; - hipFree(Ad); + checkHipErrors(hipFree(Ad)); hipFree(Bd); delete[] A; delete[] B; - hipCtxDestroy(context); + checkHipErrors(hipCtxDestroy(context)); return 0; } diff --git a/samples/0_Intro/module_api_global/CMakeLists.txt b/samples/0_Intro/module_api_global/CMakeLists.txt index c3b147c60c..186a99d164 100644 --- a/samples/0_Intro/module_api_global/CMakeLists.txt +++ b/samples/0_Intro/module_api_global/CMakeLists.txt @@ -50,5 +50,7 @@ add_custom_target( add_dependencies(runKernel.hip.out codeobj) +target_include_directories(runKernel.hip.out PRIVATE ../../common) + # Link with HIP target_link_libraries(runKernel.hip.out hip::host) \ No newline at end of file diff --git a/samples/0_Intro/module_api_global/Makefile b/samples/0_Intro/module_api_global/Makefile index fdbc77f65f..732eec63aa 100644 --- a/samples/0_Intro/module_api_global/Makefile +++ b/samples/0_Intro/module_api_global/Makefile @@ -27,11 +27,12 @@ ifeq (,$(HIP_PATH)) endif HIPCC=$(HIP_PATH)/bin/hipcc HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --compiler) +INCLUDES := -I../../common all: vcpy_kernel.code runKernel.hip.out runKernel.hip.out: runKernel.cpp - $(HIPCC) $(HIPCC_FLAGS) $< -o $@ + $(HIPCC) $(HIPCC_FLAGS) $(INCLUDES) $< -o $@ vcpy_kernel.code: vcpy_kernel.cpp $(HIPCC) --genco $(GENCO_FLAGS) $^ -o $@ diff --git a/samples/0_Intro/module_api_global/runKernel.cpp b/samples/0_Intro/module_api_global/runKernel.cpp index 23f4ec2750..40711dd4b2 100644 --- a/samples/0_Intro/module_api_global/runKernel.cpp +++ b/samples/0_Intro/module_api_global/runKernel.cpp @@ -31,7 +31,7 @@ THE SOFTWARE. #define SIZE LEN * sizeof(float) #define fileName "vcpy_kernel.code" -#define HIP_CHECK(cmd) \ +#define checkHipErrors(cmd) \ { \ hipError_t status = cmd; \ if (status != hipSuccess) { \ @@ -64,23 +64,23 @@ int main() { hipMemcpyHtoD(hipDeviceptr_t(Ad), A, SIZE); hipMemcpyHtoD((hipDeviceptr_t)(Bd), B, SIZE); hipModule_t Module; - HIP_CHECK(hipModuleLoad(&Module, fileName)); + checkHipErrors(hipModuleLoad(&Module, fileName)); float myDeviceGlobal_h = 42.0; float* deviceGlobal; size_t deviceGlobalSize; - HIP_CHECK(hipModuleGetGlobal((void**)&deviceGlobal, &deviceGlobalSize, Module, "myDeviceGlobal")); - HIP_CHECK(hipMemcpyHtoD(hipDeviceptr_t(deviceGlobal), &myDeviceGlobal_h, deviceGlobalSize)); + checkHipErrors(hipModuleGetGlobal((void**)&deviceGlobal, &deviceGlobalSize, Module, "myDeviceGlobal")); + checkHipErrors(hipMemcpyHtoD(hipDeviceptr_t(deviceGlobal), &myDeviceGlobal_h, deviceGlobalSize)); #define ARRAY_SIZE 16 float myDeviceGlobalArray_h[ARRAY_SIZE]; float *myDeviceGlobalArray; size_t myDeviceGlobalArraySize; - HIP_CHECK(hipModuleGetGlobal((void**)&myDeviceGlobalArray, &myDeviceGlobalArraySize, Module, "myDeviceGlobalArray")); + checkHipErrors(hipModuleGetGlobal((void**)&myDeviceGlobalArray, &myDeviceGlobalArraySize, Module, "myDeviceGlobalArray")); for (int i = 0; i < ARRAY_SIZE; i++) { myDeviceGlobalArray_h[i] = i * 1000.0f; - HIP_CHECK(hipMemcpyHtoD(hipDeviceptr_t(myDeviceGlobalArray), &myDeviceGlobalArray_h, myDeviceGlobalArraySize)); + checkHipErrors(hipMemcpyHtoD(hipDeviceptr_t(myDeviceGlobalArray), &myDeviceGlobalArray_h, myDeviceGlobalArraySize)); } struct { @@ -98,8 +98,8 @@ int main() { { hipFunction_t Function; - HIP_CHECK(hipModuleGetFunction(&Function, Module, "hello_world")); - HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config)); + checkHipErrors(hipModuleGetFunction(&Function, Module, "hello_world")); + checkHipErrors(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config)); hipMemcpyDtoH(B, Bd, SIZE); @@ -123,13 +123,13 @@ int main() { { hipFunction_t Function; - HIP_CHECK(hipModuleGetFunction(&Function, Module, "test_globals")); + checkHipErrors(hipModuleGetFunction(&Function, Module, "test_globals")); int val =-1; - HIP_CHECK(hipFuncGetAttribute(&val, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES,Function)); + checkHipErrors(hipFuncGetAttribute(&val, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES,Function)); printf("Shared Size Bytes = %d\n",val); - HIP_CHECK(hipFuncGetAttribute(&val, HIP_FUNC_ATTRIBUTE_NUM_REGS, Function)); + checkHipErrors(hipFuncGetAttribute(&val, HIP_FUNC_ATTRIBUTE_NUM_REGS, Function)); printf("Num Regs = %d\n",val); - HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config)); + checkHipErrors(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config)); hipMemcpyDtoH(B, Bd, SIZE); diff --git a/samples/1_Utils/hipDispatchLatency/CMakeLists.txt b/samples/1_Utils/hipDispatchLatency/CMakeLists.txt index d0a453f1d5..33caab73b7 100644 --- a/samples/1_Utils/hipDispatchLatency/CMakeLists.txt +++ b/samples/1_Utils/hipDispatchLatency/CMakeLists.txt @@ -22,6 +22,8 @@ project(hipDispatchLatency) cmake_minimum_required(VERSION 3.10) +include_directories(../../common) + if (NOT DEFINED ROCM_PATH ) set ( ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory." ) endif () diff --git a/samples/1_Utils/hipDispatchLatency/Makefile b/samples/1_Utils/hipDispatchLatency/Makefile index 988827071c..6643e3f034 100644 --- a/samples/1_Utils/hipDispatchLatency/Makefile +++ b/samples/1_Utils/hipDispatchLatency/Makefile @@ -26,8 +26,9 @@ ifeq (,$(HIP_PATH)) HIP_PATH=../../.. endif HIPCC=$(HIP_PATH)/bin/hipcc -std=c++11 +INCLUDES := -I../../common -CXXFLAGS = -O3 +CXXFLAGS = -O3 $(INCLUDES) all: test_kernel.code hipDispatchLatency.out hipDispatchEnqueueRateMT.out diff --git a/samples/1_Utils/hipDispatchLatency/hipDispatchEnqueueRateMT.cpp b/samples/1_Utils/hipDispatchLatency/hipDispatchEnqueueRateMT.cpp index 9bc3258b4d..aced502d8f 100644 --- a/samples/1_Utils/hipDispatchLatency/hipDispatchEnqueueRateMT.cpp +++ b/samples/1_Utils/hipDispatchLatency/hipDispatchEnqueueRateMT.cpp @@ -22,6 +22,7 @@ THE SOFTWARE. #ifdef __HIP_PLATFORM_AMD__ #include "hip/hip_ext.h" #endif +#include "hip_helper.h" #include #include #include @@ -41,16 +42,6 @@ THE SOFTWARE. #define failed(...) \ abort(); -#define HIPCHECK(error) \ - { \ - hipError_t localError = error; \ - if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) { \ - printf("error: '%s'(%d) from %s at %s:%d\n", hipGetErrorString(localError), \ - localError, #error, __FILE__, __LINE__); \ - failed("API returned error code."); \ - } \ - } - __global__ void EmptyKernel() {} @@ -87,12 +78,12 @@ void hipModuleLaunchKernel_enqueue_rate(const std::vector& buffer, std::at { //resources necessary for this thread hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); + checkHipErrors(hipStreamCreate(&stream)); hipModule_t module; hipFunction_t function; - HIPCHECK(hipModuleLoadData(&module, &buffer[0])); - HIPCHECK(hipModuleGetFunction(&function, module, "test")); + checkHipErrors(hipModuleLoadData(&module, &buffer[0])); + checkHipErrors(hipModuleGetFunction(&function, module, "test")); void* kernel_params = nullptr; std::array results; @@ -103,13 +94,13 @@ void hipModuleLaunchKernel_enqueue_rate(const std::vector& buffer, std::at for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) { auto start = std::chrono::high_resolution_clock::now(); - HIPCHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, stream, &kernel_params, nullptr)); + checkHipErrors(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, stream, &kernel_params, nullptr)); auto stop = std::chrono::high_resolution_clock::now(); results[i] = std::chrono::duration(stop - start).count(); } - HIPCHECK(hipModuleUnload(module)); + checkHipErrors(hipModuleUnload(module)); print_timing("Thread ID : " + std::to_string(tid) + " , " + "hipModuleLaunchKernel enqueue rate", results); - HIPCHECK(hipStreamDestroy(stream)); + checkHipErrors(hipStreamDestroy(stream)); } // Measure time taken to enqueue a kernel on the GPU using hipLaunchKernelGGL @@ -117,7 +108,7 @@ void hipLaunchKernelGGL_enqueue_rate(const std::vector& buffer, std::atomi { //resources necessary for this thread hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); + checkHipErrors(hipStreamCreate(&stream)); std::array results; //synchronize all threads, before running @@ -131,7 +122,7 @@ void hipLaunchKernelGGL_enqueue_rate(const std::vector& buffer, std::atomi results[i] = std::chrono::duration(stop - start).count(); } print_timing("Thread ID : " + std::to_string(tid) + " , " + "hipLaunchKernelGGL enqueue rate", results); - HIPCHECK(hipStreamDestroy(stream)); + checkHipErrors(hipStreamDestroy(stream)); } // Simple thread pool diff --git a/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp b/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp index 5b96f3a199..525dafb45c 100644 --- a/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp +++ b/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp @@ -21,6 +21,7 @@ THE SOFTWARE. #ifdef __HIP_PLATFORM_AMD__ #include "hip/hip_ext.h" #endif +#include "hip_helper.h" #include #include #include @@ -66,19 +67,19 @@ void print_timing(std::string test, const std::array &re int main() { hipStream_t stream0 = 0; hipDevice_t device; - hipDeviceGet(&device, 0); + checkHipErrors(hipDeviceGet(&device, 0)); hipCtx_t context; - hipCtxCreate(&context, 0, device); + checkHipErrors(hipCtxCreate(&context, 0, device)); hipModule_t module; hipFunction_t function; - hipModuleLoad(&module, FILE_NAME); - hipModuleGetFunction(&function, module, KERNEL_NAME); + checkHipErrors(hipModuleLoad(&module, FILE_NAME)); + checkHipErrors(hipModuleGetFunction(&function, module, KERNEL_NAME)); void* params = nullptr; std::array results; hipEvent_t start, stop; - hipEventCreate(&start); - hipEventCreate(&stop); + checkHipErrors(hipEventCreate(&start)); + checkHipErrors(hipEventCreate(&stop)); /************************************************************************************/ /* HIP kernel launch enqueue rate: */ @@ -88,7 +89,7 @@ int main() { // Timing hipModuleLaunchKernel for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) { auto start = std::chrono::high_resolution_clock::now(); - hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, ¶ms, nullptr); + checkHipErrors(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, ¶ms, nullptr)); auto stop = std::chrono::high_resolution_clock::now(); results[i] = std::chrono::duration(stop - start).count(); } @@ -110,11 +111,11 @@ int main() { //Timing around the dispatch for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) { - hipEventRecord(start, 0); + checkHipErrors(hipEventRecord(start, 0)); hipLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0); - hipEventRecord(stop, 0); - hipEventSynchronize(stop); - hipEventElapsedTime(&results[i], start, stop); + checkHipErrors(hipEventRecord(stop, 0)); + checkHipErrors(hipEventSynchronize(stop)); + checkHipErrors(hipEventElapsedTime(&results[i], start, stop)); } print_timing("Timing around single dispatch latency", results); @@ -124,18 +125,18 @@ int main() { /*********************************************************************************/ for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) { - hipEventRecord(start, 0); + checkHipErrors(hipEventRecord(start, 0)); for (int j = 0; j < BATCH_SIZE; j++) { hipLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0); } - hipEventRecord(stop, 0); - hipEventSynchronize(stop); - hipEventElapsedTime(&results[i], start, stop); + checkHipErrors(hipEventRecord(stop, 0)); + checkHipErrors(hipEventSynchronize(stop)); + checkHipErrors(hipEventElapsedTime(&results[i], start, stop)); } print_timing("Batch dispatch latency", results, BATCH_SIZE); - hipEventDestroy(start); - hipEventDestroy(stop); - hipCtxDestroy(context); + checkHipErrors(hipEventDestroy(start)); + checkHipErrors(hipEventDestroy(stop)); + checkHipErrors(hipCtxDestroy(context)); } diff --git a/samples/1_Utils/hipInfo/CMakeLists.txt b/samples/1_Utils/hipInfo/CMakeLists.txt index 6192c8ecff..60cf123282 100644 --- a/samples/1_Utils/hipInfo/CMakeLists.txt +++ b/samples/1_Utils/hipInfo/CMakeLists.txt @@ -57,6 +57,8 @@ add_executable(hipInfo hipInfo.cpp) # Link with HIP target_link_libraries(hipInfo hip::host) +target_include_directories(hipInfo PRIVATE ../../common) + # Used only when make install is called # when hipInfo is built as part of compute project # hipInfo.exe will be installed to install/hip/bin path diff --git a/samples/1_Utils/hipInfo/Makefile b/samples/1_Utils/hipInfo/Makefile index c6c343dbd1..c5bc1a9c74 100644 --- a/samples/1_Utils/hipInfo/Makefile +++ b/samples/1_Utils/hipInfo/Makefile @@ -26,13 +26,14 @@ ifeq (,$(HIP_PATH)) HIP_PATH=../../.. endif HIPCC=$(HIP_PATH)/bin/hipcc +INCLUDES := -I../../common EXE=hipInfo all: install $(EXE): hipInfo.cpp - $(HIPCC) hipInfo.cpp -o $@ + $(HIPCC) hipInfo.cpp $(INCLUDES) -o $@ install: $(EXE) cp $(EXE) $(HIP_PATH)/bin diff --git a/samples/1_Utils/hipInfo/hipInfo.cpp b/samples/1_Utils/hipInfo/hipInfo.cpp index 28128b6d33..56978818d9 100644 --- a/samples/1_Utils/hipInfo/hipInfo.cpp +++ b/samples/1_Utils/hipInfo/hipInfo.cpp @@ -23,6 +23,7 @@ THE SOFTWARE. #include #include #include "hip/hip_runtime.h" +#include "hip_helper.h" #define KNRM "\x1B[0m" #define KRED "\x1B[31m" @@ -33,20 +34,6 @@ THE SOFTWARE. #define KCYN "\x1B[36m" #define KWHT "\x1B[37m" -#define failed(...) \ - printf("%serror: ", KRED); \ - printf(__VA_ARGS__); \ - printf("\n"); \ - printf("error: TEST FAILED\n%s", KNRM); \ - exit(EXIT_FAILURE); - -#define HIPCHECK(error) \ - if (error != hipSuccess) { \ - printf("%serror: '%s'(%d) at %s:%d%s\n", KRED, hipGetErrorString(error), error, __FILE__, \ - __LINE__, KNRM); \ - failed("API returned error code."); \ - } - void printCompilerInfo() { #ifdef __NVCC__ printf("compiler: nvcc\n"); @@ -76,7 +63,7 @@ void printDeviceProp(int deviceId) { cout << setw(w1) << "device#" << deviceId << endl; hipDeviceProp_t props = {0}; - HIPCHECK(hipGetDeviceProperties(&props, deviceId)); + checkHipErrors(hipGetDeviceProperties(&props, deviceId)); cout << setw(w1) << "Name: " << props.name << endl; cout << setw(w1) << "pciBusID: " << props.pciBusID << endl; @@ -149,11 +136,11 @@ void printDeviceProp(int deviceId) { cout << setw(w1) << "gcnArchName: " << props.gcnArchName << endl; #endif int deviceCnt; - hipGetDeviceCount(&deviceCnt); + checkHipErrors(hipGetDeviceCount(&deviceCnt)); cout << setw(w1) << "peers: "; for (int i = 0; i < deviceCnt; i++) { int isPeer; - hipDeviceCanAccessPeer(&isPeer, i, deviceId); + checkHipErrors(hipDeviceCanAccessPeer(&isPeer, i, deviceId)); if (isPeer) { cout << "device#" << i << " "; } @@ -162,7 +149,7 @@ void printDeviceProp(int deviceId) { cout << setw(w1) << "non-peers: "; for (int i = 0; i < deviceCnt; i++) { int isPeer; - hipDeviceCanAccessPeer(&isPeer, i, deviceId); + checkHipErrors(hipDeviceCanAccessPeer(&isPeer, i, deviceId)); if (!isPeer) { cout << "device#" << i << " "; } @@ -185,7 +172,7 @@ void printDeviceProp(int deviceId) { size_t free, total; - hipMemGetInfo(&free, &total); + checkHipErrors(hipMemGetInfo(&free, &total)); cout << fixed << setprecision(2); cout << setw(w1) << "memInfo.total: " << bytesToGB(total) << " GB" << endl; @@ -202,10 +189,10 @@ int main(int argc, char* argv[]) { int deviceCnt; - HIPCHECK(hipGetDeviceCount(&deviceCnt)); + checkHipErrors(hipGetDeviceCount(&deviceCnt)); for (int i = 0; i < deviceCnt; i++) { - hipSetDevice(i); + checkHipErrors(hipSetDevice(i)); printDeviceProp(i); } diff --git a/samples/2_Cookbook/0_MatrixTranspose/CMakeLists.txt b/samples/2_Cookbook/0_MatrixTranspose/CMakeLists.txt index ae4735c1ae..015e6a047e 100644 --- a/samples/2_Cookbook/0_MatrixTranspose/CMakeLists.txt +++ b/samples/2_Cookbook/0_MatrixTranspose/CMakeLists.txt @@ -40,5 +40,7 @@ set(CMAKE_BUILD_TYPE Release) # Create the excutable add_executable(MatrixTranspose MatrixTranspose.cpp) +target_include_directories(MatrixTranspose PRIVATE ../../common) + # Link with HIP target_link_libraries(MatrixTranspose hip::host) diff --git a/samples/2_Cookbook/0_MatrixTranspose/Makefile b/samples/2_Cookbook/0_MatrixTranspose/Makefile index 6d5b787510..308efa6dc6 100644 --- a/samples/2_Cookbook/0_MatrixTranspose/Makefile +++ b/samples/2_Cookbook/0_MatrixTranspose/Makefile @@ -30,6 +30,7 @@ HIPCC=$(HIP_PATH)/bin/hipcc TARGET=hcc +INCLUDES := -I../../common SOURCES = MatrixTranspose.cpp OBJECTS = $(SOURCES:.cpp=.o) @@ -40,7 +41,7 @@ EXECUTABLE=./MatrixTranspose all: $(EXECUTABLE) test -CXXFLAGS =-g +CXXFLAGS =-g $(INCLUDES) CXX=$(HIPCC) diff --git a/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp b/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp index 8444cff851..205f3b6966 100644 --- a/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp +++ b/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp @@ -24,6 +24,7 @@ THE SOFTWARE. // hip header file #include "hip/hip_runtime.h" +#include "hip_helper.h" #define WIDTH 1024 @@ -61,7 +62,7 @@ int main() { float* gpuTransposeMatrix; hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); + checkHipErrors(hipGetDeviceProperties(&devProp, 0)); std::cout << "Device name " << devProp.name << std::endl; @@ -78,11 +79,11 @@ int main() { } // allocate the memory on the device side - hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); + checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float))); + checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float))); // Memory transfer from host to device - hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); + checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice)); // Lauching kernel from host hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), @@ -90,7 +91,7 @@ int main() { gpuMatrix, WIDTH); // Memory transfer from device to host - hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); + checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost)); // CPU MatrixTranspose computation matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); @@ -110,8 +111,8 @@ int main() { } // free the resources on device side - hipFree(gpuMatrix); - hipFree(gpuTransposeMatrix); + checkHipErrors(hipFree(gpuMatrix)); + checkHipErrors(hipFree(gpuTransposeMatrix)); // free the resources on host side free(Matrix); diff --git a/samples/2_Cookbook/10_inline_asm/CMakeLists.txt b/samples/2_Cookbook/10_inline_asm/CMakeLists.txt index ac4e586772..f20827df3f 100644 --- a/samples/2_Cookbook/10_inline_asm/CMakeLists.txt +++ b/samples/2_Cookbook/10_inline_asm/CMakeLists.txt @@ -40,5 +40,7 @@ set(CMAKE_BUILD_TYPE Release) # Create the excutable add_executable(inline_asm inline_asm.cpp) +target_include_directories(inline_asm PRIVATE ../../common) + # Link with HIP target_link_libraries(inline_asm hip::host) diff --git a/samples/2_Cookbook/10_inline_asm/Makefile b/samples/2_Cookbook/10_inline_asm/Makefile index 58f013a2ba..31928eb2dc 100644 --- a/samples/2_Cookbook/10_inline_asm/Makefile +++ b/samples/2_Cookbook/10_inline_asm/Makefile @@ -32,7 +32,7 @@ TARGET=hcc SOURCES = inline_asm.cpp OBJECTS = $(SOURCES:.cpp=.o) - +INCLUDES := -I../../common EXECUTABLE=./inline_asm .PHONY: test @@ -40,7 +40,7 @@ EXECUTABLE=./inline_asm all: $(EXECUTABLE) test -CXXFLAGS =-g +CXXFLAGS =-g $(INCLUDES) CXX=$(HIPCC) diff --git a/samples/2_Cookbook/10_inline_asm/inline_asm.cpp b/samples/2_Cookbook/10_inline_asm/inline_asm.cpp index 8145b3c86e..39bc86f1b6 100644 --- a/samples/2_Cookbook/10_inline_asm/inline_asm.cpp +++ b/samples/2_Cookbook/10_inline_asm/inline_asm.cpp @@ -24,6 +24,7 @@ THE SOFTWARE. // hip header file #include "hip/hip_runtime.h" +#include "hip_helper.h" #define WIDTH 1024 @@ -59,13 +60,13 @@ int main() { float* gpuTransposeMatrix; hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); + checkHipErrors(hipGetDeviceProperties(&devProp, 0)); std::cout << "Device name " << devProp.name << std::endl; hipEvent_t start, stop; - hipEventCreate(&start); - hipEventCreate(&stop); + checkHipErrors(hipEventCreate(&start)); + checkHipErrors(hipEventCreate(&stop)); float eventMs = 1.0f; int i; @@ -81,25 +82,25 @@ int main() { } // allocate the memory on the device side - hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); + checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float))); + checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float))); // Record the start event - hipEventRecord(start, NULL); + checkHipErrors(hipEventRecord(start, NULL)); // Memory transfer from host to device - hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); + checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice)); // Record the stop event - hipEventRecord(stop, NULL); - hipEventSynchronize(stop); + checkHipErrors(hipEventRecord(stop, NULL)); + checkHipErrors(hipEventSynchronize(stop)); - hipEventElapsedTime(&eventMs, start, stop); + checkHipErrors(hipEventElapsedTime(&eventMs, start, stop)); printf("hipMemcpyHostToDevice time taken = %6.3fms\n", eventMs); // Record the start event - hipEventRecord(start, NULL); + checkHipErrors(hipEventRecord(start, NULL)); // Lauching kernel from host hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), @@ -107,24 +108,24 @@ int main() { gpuMatrix, WIDTH); // Record the stop event - hipEventRecord(stop, NULL); - hipEventSynchronize(stop); + checkHipErrors(hipEventRecord(stop, NULL)); + checkHipErrors(hipEventSynchronize(stop)); - hipEventElapsedTime(&eventMs, start, stop); + checkHipErrors(hipEventElapsedTime(&eventMs, start, stop)); printf("kernel Execution time = %6.3fms\n", eventMs); // Record the start event - hipEventRecord(start, NULL); + checkHipErrors(hipEventRecord(start, NULL)); // Memory transfer from device to host - hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); + checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost)); // Record the stop event - hipEventRecord(stop, NULL); - hipEventSynchronize(stop); + checkHipErrors(hipEventRecord(stop, NULL)); + checkHipErrors(hipEventSynchronize(stop)); - hipEventElapsedTime(&eventMs, start, stop); + checkHipErrors(hipEventElapsedTime(&eventMs, start, stop)); printf("hipMemcpyDeviceToHost time taken = %6.3fms\n", eventMs); @@ -147,8 +148,8 @@ int main() { } // free the resources on device side - hipFree(gpuMatrix); - hipFree(gpuTransposeMatrix); + checkHipErrors(hipFree(gpuMatrix)); + checkHipErrors(hipFree(gpuTransposeMatrix)); // free the resources on host side free(Matrix); diff --git a/samples/2_Cookbook/11_texture_driver/CMakeLists.txt b/samples/2_Cookbook/11_texture_driver/CMakeLists.txt index f93b2e791e..55aa40b2a6 100644 --- a/samples/2_Cookbook/11_texture_driver/CMakeLists.txt +++ b/samples/2_Cookbook/11_texture_driver/CMakeLists.txt @@ -50,5 +50,7 @@ add_custom_target( add_dependencies(texture2dDrv codeobj) +target_include_directories(texture2dDrv PRIVATE ../../common) + # Link with HIP target_link_libraries(texture2dDrv hip::host) diff --git a/samples/2_Cookbook/11_texture_driver/Makefile b/samples/2_Cookbook/11_texture_driver/Makefile index f149005aa5..25737ea2e2 100644 --- a/samples/2_Cookbook/11_texture_driver/Makefile +++ b/samples/2_Cookbook/11_texture_driver/Makefile @@ -27,11 +27,12 @@ ifeq (,$(HIP_PATH)) endif HIPCC=$(HIP_PATH)/bin/hipcc HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --compiler) +INCLUDES := -I../../common all: tex2dKernel.code texture2dDrv.out texture2dDrv.out: texture2dDrv.cpp - $(HIPCC) $(HIPCC_FLAGS) $< -o $@ + $(HIPCC) $(HIPCC_FLAGS) $(INCLUDES) $< -o $@ tex2dKernel.code: tex2dKernel.cpp $(HIPCC) --genco $(GENCO_FLAGS) $^ -o $@ diff --git a/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp b/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp index f03b3873bf..e30c1ba911 100644 --- a/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp +++ b/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp @@ -24,21 +24,12 @@ THE SOFTWARE. #include #include #include +#include "hip_helper.h" #define fileName "tex2dKernel.code" bool testResult = true; -#define HIP_CHECK(cmd) \ - { \ - hipError_t status = cmd; \ - if (status != hipSuccess) { \ - std::cout << "error: #" << status << " (" << hipGetErrorString(status) \ - << ") at line:" << __LINE__ << ": " << #cmd << std::endl; \ - abort(); \ - } \ - } - template::value>::type *t = nullptr> static inline hipArray_Format getArrayFormat() { @@ -154,11 +145,11 @@ bool runTest(hipModule_t &module, const char *refName, const char *funcName) { hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); hipArray_t array; - HIP_CHECK(hipMallocArray(&array, &channelDesc, width, height)); + checkHipErrors(hipMallocArray(&array, &channelDesc, width, height)); const size_t spitch = width * sizeof(T); - HIP_CHECK(hipMemcpy2DToArray(array, 0, 0, hData, spitch, width * sizeof(T), + checkHipErrors(hipMemcpy2DToArray(array, 0, 0, hData, spitch, width * sizeof(T), height, hipMemcpyHostToDevice)); hipResourceDesc resDesc; @@ -175,10 +166,10 @@ bool runTest(hipModule_t &module, const char *refName, const char *funcName) { texDesc.normalizedCoords = 0; hipTextureObject_t texObj; - HIP_CHECK(hipCreateTextureObject(&texObj, &resDesc, &texDesc, nullptr)); + checkHipErrors(hipCreateTextureObject(&texObj, &resDesc, &texDesc, nullptr)); T *dData = NULL; - HIP_CHECK(hipMalloc((void** )&dData, size)); + checkHipErrors(hipMalloc((void** )&dData, size)); struct { void *_Ad; @@ -197,18 +188,18 @@ bool runTest(hipModule_t &module, const char *refName, const char *funcName) { HIP_LAUNCH_PARAM_BUFFER_SIZE, &sizeTemp, HIP_LAUNCH_PARAM_END }; hipFunction_t Function; - HIP_CHECK(hipModuleGetFunction(&Function, module, funcName)); + checkHipErrors(hipModuleGetFunction(&Function, module, funcName)); int temp1 = width / 16; int temp2 = height / 16; - HIP_CHECK( + checkHipErrors( hipModuleLaunchKernel(Function, 16, 16, 1, temp1, temp2, 1, 0, 0, NULL, (void** )&config)); - HIP_CHECK(hipDeviceSynchronize()); + checkHipErrors(hipDeviceSynchronize()); T *hOutputData = (T*) malloc(size); memset(hOutputData, 0, size); - HIP_CHECK(hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost)); + checkHipErrors(hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost)); for (int i = 0; i < height; i++) { for (int j = 0; j < width; j++) { @@ -219,9 +210,9 @@ bool runTest(hipModule_t &module, const char *refName, const char *funcName) { } } } - HIP_CHECK(hipDestroyTextureObject(texObj)); - HIP_CHECK(hipFree(dData)); - HIP_CHECK(hipFreeArray(array)); + checkHipErrors(hipDestroyTextureObject(texObj)); + checkHipErrors(hipFree(dData)); + checkHipErrors(hipFreeArray(array)); free(hOutputData); free(hData); printf("%s test %s ...\n", funcName, testResult ? "PASSED" : "FAILED"); @@ -231,7 +222,7 @@ bool runTest(hipModule_t &module, const char *refName, const char *funcName) { inline bool isImageSupported() { int imageSupport = 1; #ifdef __HIP_PLATFORM_AMD__ - HIP_CHECK(hipDeviceGetAttribute(&imageSupport, hipDeviceAttributeImageSupport, + checkHipErrors(hipDeviceGetAttribute(&imageSupport, hipDeviceAttributeImageSupport, 0)); #endif return imageSupport != 0; @@ -242,10 +233,10 @@ int main(int argc, char** argv) { printf("Texture is not support on the device. Skipped.\n"); return 0; } - HIP_CHECK(hipInit(0)); - HIP_CHECK(hipSetDevice(0)); + checkHipErrors(hipInit(0)); + checkHipErrors(hipSetDevice(0)); hipModule_t module; - HIP_CHECK(hipModuleLoad(&module, fileName)); + checkHipErrors(hipModuleLoad(&module, fileName)); testResult = testResult && runTest(module, "texChar", "tex2dKernelChar"); testResult = testResult && runTest(module, "texShort", "tex2dKernelShort"); testResult = testResult && runTest(module, "texInt", "tex2dKernelInt"); @@ -255,7 +246,7 @@ int main(int argc, char** argv) { testResult = testResult && runTest(module, "texInt4", "tex2dKernelInt4"); testResult = testResult && runTest(module, "texFloat4", "tex2dKernelFloat4"); - HIP_CHECK(hipModuleUnload(module)); + checkHipErrors(hipModuleUnload(module)); printf("texture2dDrv %s ...\n", testResult ? "PASSED" : "FAILED"); return testResult ? EXIT_SUCCESS : EXIT_FAILURE; } diff --git a/samples/2_Cookbook/12_cmake_hip_add_executable/CMakeLists.txt b/samples/2_Cookbook/12_cmake_hip_add_executable/CMakeLists.txt index 7045a891e1..9bc99e36ae 100644 --- a/samples/2_Cookbook/12_cmake_hip_add_executable/CMakeLists.txt +++ b/samples/2_Cookbook/12_cmake_hip_add_executable/CMakeLists.txt @@ -51,6 +51,8 @@ set(MY_NVCC_OPTIONS) set_source_files_properties(${MY_SOURCE_FILES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) hip_add_executable(${MY_TARGET_NAME} ${MY_SOURCE_FILES} HIPCC_OPTIONS ${MY_HIPCC_OPTIONS} CLANG_OPTIONS ${MY_CLANG_OPTIONS} NVCC_OPTIONS ${MY_NVCC_OPTIONS}) +target_include_directories(${MY_TARGET_NAME} PRIVATE ../../common) + # Search for rocm in common locations list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH}) find_package(hip QUIET CONFIG) diff --git a/samples/2_Cookbook/12_cmake_hip_add_executable/MatrixTranspose.cpp b/samples/2_Cookbook/12_cmake_hip_add_executable/MatrixTranspose.cpp index 8444cff851..205f3b6966 100644 --- a/samples/2_Cookbook/12_cmake_hip_add_executable/MatrixTranspose.cpp +++ b/samples/2_Cookbook/12_cmake_hip_add_executable/MatrixTranspose.cpp @@ -24,6 +24,7 @@ THE SOFTWARE. // hip header file #include "hip/hip_runtime.h" +#include "hip_helper.h" #define WIDTH 1024 @@ -61,7 +62,7 @@ int main() { float* gpuTransposeMatrix; hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); + checkHipErrors(hipGetDeviceProperties(&devProp, 0)); std::cout << "Device name " << devProp.name << std::endl; @@ -78,11 +79,11 @@ int main() { } // allocate the memory on the device side - hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); + checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float))); + checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float))); // Memory transfer from host to device - hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); + checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice)); // Lauching kernel from host hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), @@ -90,7 +91,7 @@ int main() { gpuMatrix, WIDTH); // Memory transfer from device to host - hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); + checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost)); // CPU MatrixTranspose computation matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); @@ -110,8 +111,8 @@ int main() { } // free the resources on device side - hipFree(gpuMatrix); - hipFree(gpuTransposeMatrix); + checkHipErrors(hipFree(gpuMatrix)); + checkHipErrors(hipFree(gpuTransposeMatrix)); // free the resources on host side free(Matrix); diff --git a/samples/2_Cookbook/13_occupancy/CMakeLists.txt b/samples/2_Cookbook/13_occupancy/CMakeLists.txt index 481cf58b26..44d0303068 100644 --- a/samples/2_Cookbook/13_occupancy/CMakeLists.txt +++ b/samples/2_Cookbook/13_occupancy/CMakeLists.txt @@ -40,5 +40,7 @@ set(CMAKE_BUILD_TYPE Release) # Create the excutable add_executable(occupancy occupancy.cpp) +target_include_directories(occupancy PRIVATE ../../common) + # Link with HIP target_link_libraries(occupancy hip::host) diff --git a/samples/2_Cookbook/13_occupancy/Makefile b/samples/2_Cookbook/13_occupancy/Makefile index 73f0753afb..dd4037418f 100644 --- a/samples/2_Cookbook/13_occupancy/Makefile +++ b/samples/2_Cookbook/13_occupancy/Makefile @@ -26,7 +26,7 @@ ifeq (,$(HIP_PATH)) HIP_PATH=../../.. endif HIPCC=$(HIP_PATH)/bin/hipcc - +INCLUDES := -I../../common EXE=./occupancy .PHONY: test @@ -34,7 +34,7 @@ EXE=./occupancy all: test $(EXE): occupancy.cpp - $(HIPCC) $^ -o $@ + $(HIPCC) $(INCLUDES) $^ -o $@ test: $(EXE) $(EXE) diff --git a/samples/2_Cookbook/13_occupancy/occupancy.cpp b/samples/2_Cookbook/13_occupancy/occupancy.cpp index 4f51b61c6e..497b670f04 100644 --- a/samples/2_Cookbook/13_occupancy/occupancy.cpp +++ b/samples/2_Cookbook/13_occupancy/occupancy.cpp @@ -19,14 +19,9 @@ THE SOFTWARE. #include "hip/hip_runtime.h" #include +#include "hip_helper.h" #define NUM 1000000 -#define HIP_CHECK(status) \ - if (status != hipSuccess) { \ - std::cout << "Got Status: " << status << " at Line: " << __LINE__ << std::endl; \ - exit(0); \ - } - // Device (Kernel) function __global__ void multiply(float* C, float* A, float* B, int N){ @@ -47,11 +42,11 @@ void multiplyCPU(float* C, float* A, float* B, int N){ void launchKernel(float* C, float* A, float* B, bool manual){ hipDeviceProp_t devProp; - HIP_CHECK(hipGetDeviceProperties(&devProp, 0)); + checkHipErrors(hipGetDeviceProperties(&devProp, 0)); hipEvent_t start, stop; - HIP_CHECK(hipEventCreate(&start)); - HIP_CHECK(hipEventCreate(&stop)); + checkHipErrors(hipEventCreate(&start)); + checkHipErrors(hipEventCreate(&stop)); float eventMs = 1.0f; const unsigned threadsperblock = 32; const unsigned blocks = (NUM/threadsperblock)+1; @@ -66,28 +61,28 @@ void launchKernel(float* C, float* A, float* B, bool manual){ std::cout << std::endl << "Manual Configuration with block size " << blockSize << std::endl; } else{ - HIP_CHECK(hipOccupancyMaxPotentialBlockSize(&mingridSize, &blockSize, multiply, 0, 0)); + checkHipErrors(hipOccupancyMaxPotentialBlockSize(&mingridSize, &blockSize, multiply, 0, 0)); std::cout << std::endl << "Automatic Configuation based on hipOccupancyMaxPotentialBlockSize " << std::endl; std::cout << "Suggested blocksize is " << blockSize << ", Minimum gridsize is " << mingridSize << std::endl; gridSize = (NUM/blockSize)+1; } // Record the start event - HIP_CHECK(hipEventRecord(start, NULL)); + checkHipErrors(hipEventRecord(start, NULL)); // Launching the Kernel from Host hipLaunchKernelGGL(multiply, dim3(gridSize), dim3(blockSize), 0, 0, C, A, B, NUM); // Record the stop event - HIP_CHECK(hipEventRecord(stop, NULL)); - HIP_CHECK(hipEventSynchronize(stop)); + checkHipErrors(hipEventRecord(stop, NULL)); + checkHipErrors(hipEventSynchronize(stop)); - HIP_CHECK(hipEventElapsedTime(&eventMs, start, stop)); + checkHipErrors(hipEventElapsedTime(&eventMs, start, stop)); printf("kernel Execution time = %6.3fms\n", eventMs); //Calculate Occupancy int numBlock = 0; - HIP_CHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, multiply, blockSize, 0)); + checkHipErrors(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, multiply, blockSize, 0)); if(devProp.maxThreadsPerMultiProcessor){ std::cout << "Theoretical Occupancy is " << (double)numBlock* blockSize/devProp.maxThreadsPerMultiProcessor * 100 << "%" << std::endl; @@ -113,14 +108,14 @@ int main() { } // allocate the memory on the device side - HIP_CHECK(hipMalloc((void**)&Ad, NUM * sizeof(float))); - HIP_CHECK(hipMalloc((void**)&Bd, NUM * sizeof(float))); - HIP_CHECK(hipMalloc((void**)&C0d, NUM * sizeof(float))); - HIP_CHECK(hipMalloc((void**)&C1d, NUM * sizeof(float))); + checkHipErrors(hipMalloc((void**)&Ad, NUM * sizeof(float))); + checkHipErrors(hipMalloc((void**)&Bd, NUM * sizeof(float))); + checkHipErrors(hipMalloc((void**)&C0d, NUM * sizeof(float))); + checkHipErrors(hipMalloc((void**)&C1d, NUM * sizeof(float))); // Memory transfer from host to device - HIP_CHECK(hipMemcpy(Ad,A,NUM * sizeof(float), hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy(Bd,B,NUM * sizeof(float), hipMemcpyHostToDevice)); + checkHipErrors(hipMemcpy(Ad,A,NUM * sizeof(float), hipMemcpyHostToDevice)); + checkHipErrors(hipMemcpy(Bd,B,NUM * sizeof(float), hipMemcpyHostToDevice)); //Kernel launch with manual/default block size launchKernel(C0d, Ad, Bd, 1); @@ -129,8 +124,8 @@ int main() { launchKernel(C1d, Ad, Bd, 0); // Memory transfer from device to host - HIP_CHECK(hipMemcpy(C0,C0d, NUM * sizeof(float), hipMemcpyDeviceToHost)); - HIP_CHECK(hipMemcpy(C1,C1d, NUM * sizeof(float), hipMemcpyDeviceToHost)); + checkHipErrors(hipMemcpy(C0,C0d, NUM * sizeof(float), hipMemcpyDeviceToHost)); + checkHipErrors(hipMemcpy(C1,C1d, NUM * sizeof(float), hipMemcpyDeviceToHost)); // CPU computation multiplyCPU(cpuC, A, B, NUM); @@ -163,10 +158,10 @@ int main() { printf("\nAutomatic Test PASSED!\n"); } - HIP_CHECK(hipFree(Ad)); - HIP_CHECK(hipFree(Bd)); - HIP_CHECK(hipFree(C0d)); - HIP_CHECK(hipFree(C1d)); + checkHipErrors(hipFree(Ad)); + checkHipErrors(hipFree(Bd)); + checkHipErrors(hipFree(C0d)); + checkHipErrors(hipFree(C1d)); free(A); free(B); diff --git a/samples/2_Cookbook/14_gpu_arch/CMakeLists.txt b/samples/2_Cookbook/14_gpu_arch/CMakeLists.txt index 084f273096..91354b11cc 100644 --- a/samples/2_Cookbook/14_gpu_arch/CMakeLists.txt +++ b/samples/2_Cookbook/14_gpu_arch/CMakeLists.txt @@ -40,5 +40,7 @@ set(CMAKE_BUILD_TYPE Release) # Create the excutable add_executable(gpuarch gpuarch.cpp) +target_include_directories(gpuarch PRIVATE ../../common) + # Link with HIP target_link_libraries(gpuarch hip::host) diff --git a/samples/2_Cookbook/14_gpu_arch/Makefile b/samples/2_Cookbook/14_gpu_arch/Makefile index c730c10a06..a3b1115780 100644 --- a/samples/2_Cookbook/14_gpu_arch/Makefile +++ b/samples/2_Cookbook/14_gpu_arch/Makefile @@ -26,7 +26,7 @@ ifeq (,$(HIP_PATH)) HIP_PATH=../../.. endif HIPCC=$(HIP_PATH)/bin/hipcc - +INCLUDES := -I../../common EXE=./gpuarch .PHONY: test @@ -34,7 +34,7 @@ EXE=./gpuarch all: test $(EXE): gpuarch.cpp - $(HIPCC) $^ -o $@ + $(HIPCC) $(INCLUDES) $^ -o $@ test: $(EXE) $(EXE) diff --git a/samples/2_Cookbook/14_gpu_arch/gpuarch.cpp b/samples/2_Cookbook/14_gpu_arch/gpuarch.cpp index f1b521fcd1..35cd93a2e9 100644 --- a/samples/2_Cookbook/14_gpu_arch/gpuarch.cpp +++ b/samples/2_Cookbook/14_gpu_arch/gpuarch.cpp @@ -25,12 +25,6 @@ THE SOFTWARE. #define SIZE (BLOCKS_PER_GRID * THREADS_PER_BLOCK) #define NOT_SUPPORTED -99 // dummy number indicates unsupported operation -#define HIP_STATUS_CHECK(status) \ - if (status != hipSuccess) { \ - std::cout << "Got Status: " << status << " at Line: " << __LINE__ << std::endl; \ - exit(0); \ - } - // Using __gfx*__ macro one can have GPU architecture specific code flow // For example: If below kernel runs on gfx908 it will increment 'in' by 'value' and store into // 'out' @@ -57,8 +51,8 @@ int main() { int32_t* hInput = static_cast(malloc(NBytes)); int32_t* hOutput = static_cast(malloc(NBytes)); - HIP_STATUS_CHECK(hipMalloc(&dInput, NBytes)); - HIP_STATUS_CHECK(hipMalloc(&dOutput, NBytes)); + checkHipErrors(hipMalloc(&dInput, NBytes)); + checkHipErrors(hipMalloc(&dOutput, NBytes)); // Initialize host input/output buffers for (int i = 0; i < SIZE; ++i) { @@ -67,14 +61,14 @@ int main() { } // Initialize device input buffer - HIP_STATUS_CHECK(hipMemcpy(dInput, hInput, NBytes, hipMemcpyHostToDevice)); + checkHipErrors(hipMemcpy(dInput, hInput, NBytes, hipMemcpyHostToDevice)); // Launch kernel hipLaunchKernelGGL(incrementKernel, dim3(BLOCKS_PER_GRID), dim3(THREADS_PER_BLOCK), 0, 0, dInput, dOutput, incrementValue, SIZE); // Copy result back to host buffer - HIP_STATUS_CHECK(hipMemcpy(hOutput, dOutput, NBytes, hipMemcpyDeviceToHost)); + checkHipErrors(hipMemcpy(hOutput, dOutput, NBytes, hipMemcpyDeviceToHost)); bool flag = true; // verify data diff --git a/samples/2_Cookbook/16_assembly_to_executable/Makefile b/samples/2_Cookbook/16_assembly_to_executable/Makefile index 56917be6c3..e0615b84dd 100644 --- a/samples/2_Cookbook/16_assembly_to_executable/Makefile +++ b/samples/2_Cookbook/16_assembly_to_executable/Makefile @@ -30,6 +30,7 @@ HIPCC=$(HIP_PATH)/bin/hipcc CLANG=$(HIP_PATH)/llvm/bin/clang LLVM_MC=$(HIP_PATH)/llvm/bin/llvm-mc CLANG_OFFLOAD_BUNDLER=$(HIP_PATH)/llvm/bin/clang-offload-bundler +INCLUDES := -I../../common SRCS=square.cpp @@ -57,8 +58,8 @@ GPU_ARCH9=gfx1103 all: src_to_asm asm_to_exec src_to_asm: - $(HIPCC) -c -S --cuda-host-only -target x86_64-linux-gnu -o $(SQ_HOST_ASM) $(SRCS) - $(HIPCC) -c -S --cuda-device-only --offload-arch=$(GPU_ARCH1) --offload-arch=$(GPU_ARCH2) --offload-arch=$(GPU_ARCH3) --offload-arch=$(GPU_ARCH4) --offload-arch=$(GPU_ARCH5) --offload-arch=$(GPU_ARCH6) --offload-arch=$(GPU_ARCH7) --offload-arch=$(GPU_ARCH8) --offload-arch=$(GPU_ARCH9) $(SRCS) + $(HIPCC) -c -S $(INCLUDES) --cuda-host-only -target x86_64-linux-gnu -o $(SQ_HOST_ASM) $(SRCS) + $(HIPCC) -c -S $(INCLUDES) --cuda-device-only --offload-arch=$(GPU_ARCH1) --offload-arch=$(GPU_ARCH2) --offload-arch=$(GPU_ARCH3) --offload-arch=$(GPU_ARCH4) --offload-arch=$(GPU_ARCH5) --offload-arch=$(GPU_ARCH6) --offload-arch=$(GPU_ARCH7) --offload-arch=$(GPU_ARCH8) --offload-arch=$(GPU_ARCH9) $(SRCS) # You may modify the .s assembly files before the next step # By default, their names will be: diff --git a/samples/2_Cookbook/16_assembly_to_executable/square.cpp b/samples/2_Cookbook/16_assembly_to_executable/square.cpp index a04bf625ca..296be3cbbe 100644 --- a/samples/2_Cookbook/16_assembly_to_executable/square.cpp +++ b/samples/2_Cookbook/16_assembly_to_executable/square.cpp @@ -19,15 +19,7 @@ THE SOFTWARE. #include #include - -#define CHECK(cmd) \ -{\ - hipError_t error = cmd;\ - if (error != hipSuccess) { \ - fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \ - exit(EXIT_FAILURE);\ - }\ -} +#include "hip_helper.h" /* This kernel is a placeholder for the kernel in assembly generated by this * sample. It will be replaced by the kernel in assembly. @@ -55,14 +47,14 @@ int main(int argc, char *argv[]) size_t Nbytes = N * sizeof(float); hipDeviceProp_t props; - CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/)); + checkHipErrors(hipGetDeviceProperties(&props, 0/*deviceID*/)); printf ("info: running on device %s\n", props.name); printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); A_h = (float*)malloc(Nbytes); - CHECK(A_h == 0 ? hipErrorMemoryAllocation : hipSuccess ); + checkHipErrors(A_h == 0 ? hipErrorMemoryAllocation : hipSuccess ); C_h = (float*)malloc(Nbytes); - CHECK(C_h == 0 ? hipErrorMemoryAllocation : hipSuccess ); + checkHipErrors(C_h == 0 ? hipErrorMemoryAllocation : hipSuccess ); // Fill with Phi + i for (size_t i=0; i>> (C_d, A_d, N); printf ("info: copy Device2Host\n"); - CHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + checkHipErrors ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - printf ("info: check result\n"); + printf ("info: checkHipErrors result\n"); for (size_t i=0; i #include - -#define CHECK(cmd) \ -{\ - hipError_t error = cmd;\ - if (error != hipSuccess) { \ - fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \ - exit(EXIT_FAILURE);\ - }\ -} +#include "hip_helper.h" /* This kernel is a placeholder for the kernel in LLVM IR generated by this * sample. It will be replaced by the kernel in LLVM IR. @@ -55,14 +47,14 @@ int main(int argc, char *argv[]) size_t Nbytes = N * sizeof(float); hipDeviceProp_t props; - CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/)); + checkHipErrors(hipGetDeviceProperties(&props, 0/*deviceID*/)); printf ("info: running on device %s\n", props.name); printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); A_h = (float*)malloc(Nbytes); - CHECK(A_h == 0 ? hipErrorMemoryAllocation : hipSuccess ); + checkHipErrors(A_h == 0 ? hipErrorMemoryAllocation : hipSuccess ); C_h = (float*)malloc(Nbytes); - CHECK(C_h == 0 ? hipErrorMemoryAllocation : hipSuccess ); + checkHipErrors(C_h == 0 ? hipErrorMemoryAllocation : hipSuccess ); // Fill with Phi + i for (size_t i=0; i>> (C_d, A_d, N); printf ("info: copy Device2Host\n"); - CHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + checkHipErrors ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - printf ("info: check result\n"); + printf ("info: checkHipErrors result\n"); for (size_t i=0; i #include - -#define CHECK(cmd) \ -{\ - hipError_t error = cmd;\ - if (error != hipSuccess) { \ - fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \ - exit(EXIT_FAILURE);\ - }\ -} - +#include "hip_helper.h" /* * Square each element in the array A and write to array C. @@ -57,14 +48,14 @@ int main(int argc, char *argv[]) size_t Nbytes = N * sizeof(float); hipDeviceProp_t props; - CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/)); + checkHipErrors(hipGetDeviceProperties(&props, 0/*deviceID*/)); printf ("info: running on device %s\n", props.name); printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); A_h = (float*)malloc(Nbytes); - CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess ); + checkHipErrors(A_h == 0 ? hipErrorOutOfMemory : hipSuccess ); C_h = (float*)malloc(Nbytes); - CHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess ); + checkHipErrors(C_h == 0 ? hipErrorOutOfMemory : hipSuccess ); // Fill with Phi + i for (size_t i=0; i #include - -#define CHECK(cmd) \ -{\ - hipError_t error = cmd;\ - if (error != hipSuccess) { \ - fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \ - exit(EXIT_FAILURE);\ - }\ -} - +#include "hip_helper.h" /* * Square each element in the array A and write to array C. @@ -57,14 +48,14 @@ int main(int argc, char *argv[]) size_t Nbytes = N * sizeof(float); hipDeviceProp_t props; - CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/)); + checkHipErrors(hipGetDeviceProperties(&props, 0/*deviceID*/)); printf ("info: running on device %s\n", props.name); printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); A_h = (float*)malloc(Nbytes); - CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess ); + checkHipErrors(A_h == 0 ? hipErrorOutOfMemory : hipSuccess ); C_h = (float*)malloc(Nbytes); - CHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess ); + checkHipErrors(C_h == 0 ? hipErrorOutOfMemory : hipSuccess ); // Fill with Phi + i for (size_t i=0; i #include - -#define CHECK(cmd) \ -{\ - hipError_t error = cmd;\ - if (error != hipSuccess) { \ - fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \ - exit(EXIT_FAILURE);\ - }\ -} - +#include "hip_helper.h" /* * Square each element in the array A and write to array C. @@ -57,14 +48,14 @@ int main(int argc, char *argv[]) size_t Nbytes = N * sizeof(float); hipDeviceProp_t props; - CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/)); + checkHipErrors(hipGetDeviceProperties(&props, 0/*deviceID*/)); printf ("info: running on device %s\n", props.name); printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); A_h = (float*)malloc(Nbytes); - CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess ); + checkHipErrors(A_h == 0 ? hipErrorOutOfMemory : hipSuccess ); C_h = (float*)malloc(Nbytes); - CHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess ); + checkHipErrors(C_h == 0 ? hipErrorOutOfMemory : hipSuccess ); // Fill with Phi + i for (size_t i=0; i #include +#include #include #include @@ -69,7 +70,7 @@ int main() hipDeviceProp_t props; int device = 0; - hipGetDeviceProperties(&props, device); + checkHipErrors(hipGetDeviceProperties(&props, device)); const char* options[] = {}; @@ -100,8 +101,8 @@ int main() hipModule_t module; hipFunction_t kernel; - hipModuleLoadData(&module, code.data()); - hipModuleGetFunction(&kernel, module, "saxpy"); + checkHipErrors(hipModuleLoadData(&module, code.data())); + checkHipErrors(hipModuleGetFunction(&kernel, module, "saxpy")); size_t n = NUM_THREADS * NUM_BLOCKS; size_t bufferSize = n * sizeof(float); @@ -117,11 +118,11 @@ int main() } hipDeviceptr_t dX, dY, dOut; - hipMalloc((void **)&dX, bufferSize); - hipMalloc((void **)&dY, bufferSize); - hipMalloc((void **)&dOut, bufferSize); - hipMemcpyHtoD(dX, hX.get(), bufferSize); - hipMemcpyHtoD(dY, hY.get(), bufferSize); + checkHipErrors(hipMalloc((void **)&dX, bufferSize)); + checkHipErrors(hipMalloc((void **)&dY, bufferSize)); + checkHipErrors(hipMalloc((void **)&dOut, bufferSize)); + checkHipErrors(hipMemcpyHtoD(dX, hX.get(), bufferSize)); + checkHipErrors(hipMemcpyHtoD(dY, hY.get(), bufferSize)); struct { float a_; @@ -136,9 +137,9 @@ int main() HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END}; - hipModuleLaunchKernel(kernel, NUM_BLOCKS, 1, 1, NUM_THREADS, 1, 1, - 0, nullptr, nullptr, config); - hipMemcpyDtoH(hOut.get(), dOut, bufferSize); + checkHipErrors(hipModuleLaunchKernel(kernel, NUM_BLOCKS, 1, 1, NUM_THREADS, 1, 1, + 0, nullptr, nullptr, config)); + checkHipErrors(hipMemcpyDtoH(hOut.get(), dOut, bufferSize)); for (size_t i = 0; i < n; ++i) { if (fabs(a * hX[i] + hY[i] - hOut[i]) > fabs(hOut[i])* 1e-6) { @@ -146,11 +147,11 @@ int main() } } - hipFree((void *)dX); - hipFree((void *)dY); - hipFree((void *)dOut); + checkHipErrors(hipFree((void *)dX)); + checkHipErrors(hipFree((void *)dY)); + checkHipErrors(hipFree((void *)dOut)); - hipModuleUnload(module); + checkHipErrors(hipModuleUnload(module)); cout << "SAXPY test completed" << endl; } diff --git a/samples/2_Cookbook/3_shared_memory/CMakeLists.txt b/samples/2_Cookbook/3_shared_memory/CMakeLists.txt index f2f5196373..bab322511d 100644 --- a/samples/2_Cookbook/3_shared_memory/CMakeLists.txt +++ b/samples/2_Cookbook/3_shared_memory/CMakeLists.txt @@ -40,5 +40,7 @@ set(CMAKE_BUILD_TYPE Release) # Create the excutable add_executable(sharedMemory sharedMemory.cpp) +target_include_directories(sharedMemory PRIVATE ../../common) + # Link with HIP target_link_libraries(sharedMemory hip::host) diff --git a/samples/2_Cookbook/3_shared_memory/Makefile b/samples/2_Cookbook/3_shared_memory/Makefile index bbd7daace3..c8571cf21f 100644 --- a/samples/2_Cookbook/3_shared_memory/Makefile +++ b/samples/2_Cookbook/3_shared_memory/Makefile @@ -32,6 +32,7 @@ TARGET=hcc SOURCES = sharedMemory.cpp OBJECTS = $(SOURCES:.cpp=.o) +INCLUDES := -I../../common EXECUTABLE=./sharedMemory @@ -40,7 +41,7 @@ EXECUTABLE=./sharedMemory all: $(EXECUTABLE) test -CXXFLAGS =-g +CXXFLAGS =-g $(INCLUDES) CXX=$(HIPCC) diff --git a/samples/2_Cookbook/3_shared_memory/sharedMemory.cpp b/samples/2_Cookbook/3_shared_memory/sharedMemory.cpp index 8bd489dbf3..b648288a8f 100644 --- a/samples/2_Cookbook/3_shared_memory/sharedMemory.cpp +++ b/samples/2_Cookbook/3_shared_memory/sharedMemory.cpp @@ -24,7 +24,7 @@ THE SOFTWARE. // hip header file #include "hip/hip_runtime.h" - +#include "hip_helper.h" #define WIDTH 64 @@ -66,7 +66,7 @@ int main() { float* gpuTransposeMatrix; hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); + checkHipErrors(hipGetDeviceProperties(&devProp, 0)); std::cout << "Device name " << devProp.name << std::endl; @@ -83,11 +83,11 @@ int main() { } // allocate the memory on the device side - hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); + checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float))); + checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float))); // Memory transfer from host to device - hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); + checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice)); // Lauching kernel from host hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), @@ -95,7 +95,7 @@ int main() { gpuMatrix, WIDTH); // Memory transfer from device to host - hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); + checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost)); // CPU MatrixTranspose computation matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); @@ -116,8 +116,8 @@ int main() { } // free the resources on device side - hipFree(gpuMatrix); - hipFree(gpuTransposeMatrix); + checkHipErrors(hipFree(gpuMatrix)); + checkHipErrors(hipFree(gpuTransposeMatrix)); // free the resources on host side free(Matrix); diff --git a/samples/2_Cookbook/4_shfl/CMakeLists.txt b/samples/2_Cookbook/4_shfl/CMakeLists.txt index 394d5852fb..d1e8a4843f 100644 --- a/samples/2_Cookbook/4_shfl/CMakeLists.txt +++ b/samples/2_Cookbook/4_shfl/CMakeLists.txt @@ -40,5 +40,7 @@ set(CMAKE_BUILD_TYPE Release) # Create the excutable add_executable(shfl shfl.cpp) +target_include_directories(shfl PRIVATE ../../common) + # Link with HIP target_link_libraries(shfl hip::host) diff --git a/samples/2_Cookbook/4_shfl/Makefile b/samples/2_Cookbook/4_shfl/Makefile index de94a3e546..6305ad10e0 100644 --- a/samples/2_Cookbook/4_shfl/Makefile +++ b/samples/2_Cookbook/4_shfl/Makefile @@ -36,6 +36,7 @@ TARGET=hcc SOURCES = shfl.cpp OBJECTS = $(SOURCES:.cpp=.o) +INCLUDES := -I../../common EXECUTABLE=./shfl @@ -44,7 +45,7 @@ EXECUTABLE=./shfl all: $(EXECUTABLE) test -CXXFLAGS =-g +CXXFLAGS =-g $(INCLUDES) CXX=$(HIPCC) diff --git a/samples/2_Cookbook/4_shfl/shfl.cpp b/samples/2_Cookbook/4_shfl/shfl.cpp index de1ff7a950..6ef968bab3 100644 --- a/samples/2_Cookbook/4_shfl/shfl.cpp +++ b/samples/2_Cookbook/4_shfl/shfl.cpp @@ -24,7 +24,7 @@ THE SOFTWARE. // hip header file #include "hip/hip_runtime.h" - +#include "hip_helper.h" #define WIDTH 4 @@ -63,7 +63,7 @@ int main() { float* gpuTransposeMatrix; hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); + checkHipErrors(hipGetDeviceProperties(&devProp, 0)); std::cout << "Device name " << devProp.name << std::endl; @@ -80,18 +80,18 @@ int main() { } // allocate the memory on the device side - hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); + checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float))); + checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float))); // Memory transfer from host to device - hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); + checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice)); // Lauching kernel from host hipLaunchKernelGGL(matrixTranspose, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH); // Memory transfer from device to host - hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); + checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost)); // CPU MatrixTranspose computation matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); @@ -112,8 +112,8 @@ int main() { } // free the resources on device side - hipFree(gpuMatrix); - hipFree(gpuTransposeMatrix); + checkHipErrors(hipFree(gpuMatrix)); + checkHipErrors(hipFree(gpuTransposeMatrix)); // free the resources on host side free(Matrix); diff --git a/samples/2_Cookbook/5_2dshfl/2dshfl.cpp b/samples/2_Cookbook/5_2dshfl/2dshfl.cpp index 269ad58383..d65af3ace2 100644 --- a/samples/2_Cookbook/5_2dshfl/2dshfl.cpp +++ b/samples/2_Cookbook/5_2dshfl/2dshfl.cpp @@ -24,6 +24,7 @@ THE SOFTWARE. // hip header file #include "hip/hip_runtime.h" +#include "hip_helper.h" #define WIDTH 4 @@ -61,7 +62,7 @@ int main() { float* gpuTransposeMatrix; hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); + checkHipErrors(hipGetDeviceProperties(&devProp, 0)); std::cout << "Device name " << devProp.name << std::endl; @@ -78,18 +79,18 @@ int main() { } // allocate the memory on the device side - hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); + checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float))); + checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float))); // Memory transfer from host to device - hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); + checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice)); // Lauching kernel from host hipLaunchKernelGGL(matrixTranspose, dim3(1), dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH); // Memory transfer from device to host - hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); + checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost)); // CPU MatrixTranspose computation matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); @@ -110,8 +111,8 @@ int main() { } // free the resources on device side - hipFree(gpuMatrix); - hipFree(gpuTransposeMatrix); + checkHipErrors(hipFree(gpuMatrix)); + checkHipErrors(hipFree(gpuTransposeMatrix)); // free the resources on host side free(Matrix); diff --git a/samples/2_Cookbook/5_2dshfl/CMakeLists.txt b/samples/2_Cookbook/5_2dshfl/CMakeLists.txt index d0ab52859d..80fa6e08b3 100644 --- a/samples/2_Cookbook/5_2dshfl/CMakeLists.txt +++ b/samples/2_Cookbook/5_2dshfl/CMakeLists.txt @@ -39,5 +39,7 @@ set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) # Create the excutable add_executable(2dshfl 2dshfl.cpp) +target_include_directories(2dshfl PRIVATE ../../common) + # Link with HIP target_link_libraries(2dshfl hip::host) diff --git a/samples/2_Cookbook/5_2dshfl/Makefile b/samples/2_Cookbook/5_2dshfl/Makefile index 91afcfc53a..116d38057a 100644 --- a/samples/2_Cookbook/5_2dshfl/Makefile +++ b/samples/2_Cookbook/5_2dshfl/Makefile @@ -36,6 +36,7 @@ TARGET=hcc SOURCES = 2dshfl.cpp OBJECTS = $(SOURCES:.cpp=.o) +INCLUDES := -I../../common EXECUTABLE=./2dshfl @@ -44,7 +45,7 @@ EXECUTABLE=./2dshfl all: $(EXECUTABLE) test -CXXFLAGS =-g +CXXFLAGS =-g $(INCLUDES) CXX=$(HIPCC) diff --git a/samples/2_Cookbook/6_dynamic_shared/CMakeLists.txt b/samples/2_Cookbook/6_dynamic_shared/CMakeLists.txt index 73c4fe621b..cb90fb6b74 100644 --- a/samples/2_Cookbook/6_dynamic_shared/CMakeLists.txt +++ b/samples/2_Cookbook/6_dynamic_shared/CMakeLists.txt @@ -22,6 +22,8 @@ project(dynamic_shared) cmake_minimum_required(VERSION 3.10) +include_directories(../../common) + if (NOT DEFINED ROCM_PATH ) set ( ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory." ) endif () @@ -39,5 +41,7 @@ set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) # Create the excutable add_executable(dynamic_shared dynamic_shared.cpp) +target_include_directories(dynamic_shared PRIVATE ../../common) + # Link with HIP target_link_libraries(dynamic_shared hip::host) diff --git a/samples/2_Cookbook/6_dynamic_shared/Makefile b/samples/2_Cookbook/6_dynamic_shared/Makefile index d95ca76085..8db78af246 100644 --- a/samples/2_Cookbook/6_dynamic_shared/Makefile +++ b/samples/2_Cookbook/6_dynamic_shared/Makefile @@ -32,6 +32,7 @@ TARGET=hcc SOURCES = dynamic_shared.cpp OBJECTS = $(SOURCES:.cpp=.o) +INCLUDES := -I../../common EXECUTABLE=./dynamic_shared @@ -40,7 +41,7 @@ EXECUTABLE=./dynamic_shared all: $(EXECUTABLE) test -CXXFLAGS =-g +CXXFLAGS =-g $(INCLUDES) CXX=$(HIPCC) diff --git a/samples/2_Cookbook/6_dynamic_shared/dynamic_shared.cpp b/samples/2_Cookbook/6_dynamic_shared/dynamic_shared.cpp index 531d94c5be..e538d3e074 100644 --- a/samples/2_Cookbook/6_dynamic_shared/dynamic_shared.cpp +++ b/samples/2_Cookbook/6_dynamic_shared/dynamic_shared.cpp @@ -24,6 +24,7 @@ THE SOFTWARE. // hip header file #include "hip/hip_runtime.h" +#include "hip_helper.h" #define WIDTH 16 @@ -65,7 +66,7 @@ int main() { float* gpuTransposeMatrix; hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); + checkHipErrors(hipGetDeviceProperties(&devProp, 0)); std::cout << "Device name " << devProp.name << std::endl; @@ -82,11 +83,11 @@ int main() { } // allocate the memory on the device side - hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); + checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float))); + checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float))); // Memory transfer from host to device - hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); + checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice)); // Lauching kernel from host hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), @@ -94,7 +95,7 @@ int main() { 0, gpuTransposeMatrix, gpuMatrix, WIDTH); // Memory transfer from device to host - hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); + checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost)); // CPU MatrixTranspose computation matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); @@ -115,8 +116,8 @@ int main() { } // free the resources on device side - hipFree(gpuMatrix); - hipFree(gpuTransposeMatrix); + checkHipErrors(hipFree(gpuMatrix)); + checkHipErrors(hipFree(gpuTransposeMatrix)); // free the resources on host side free(Matrix); diff --git a/samples/2_Cookbook/7_streams/CMakeLists.txt b/samples/2_Cookbook/7_streams/CMakeLists.txt index 2d95541905..e1133da770 100644 --- a/samples/2_Cookbook/7_streams/CMakeLists.txt +++ b/samples/2_Cookbook/7_streams/CMakeLists.txt @@ -39,5 +39,7 @@ set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) # Create the excutable add_executable(stream stream.cpp) +target_include_directories(stream PRIVATE ../../common) + # Link with HIP target_link_libraries(stream hip::host) diff --git a/samples/2_Cookbook/7_streams/Makefile b/samples/2_Cookbook/7_streams/Makefile index 70dcd4c879..e55d9c8191 100644 --- a/samples/2_Cookbook/7_streams/Makefile +++ b/samples/2_Cookbook/7_streams/Makefile @@ -32,6 +32,7 @@ TARGET=hcc SOURCES = stream.cpp OBJECTS = $(SOURCES:.cpp=.o) +INCLUDES := -I../../common EXECUTABLE=./stream @@ -40,7 +41,7 @@ EXECUTABLE=./stream all: $(EXECUTABLE) test -CXXFLAGS =-g +CXXFLAGS =-g $(INCLUDES) CXX=$(HIPCC) diff --git a/samples/2_Cookbook/7_streams/stream.cpp b/samples/2_Cookbook/7_streams/stream.cpp index 06da516444..89bafb2a31 100644 --- a/samples/2_Cookbook/7_streams/stream.cpp +++ b/samples/2_Cookbook/7_streams/stream.cpp @@ -22,6 +22,7 @@ THE SOFTWARE. #include #include +#include "hip_helper.h" #define WIDTH 32 @@ -66,11 +67,11 @@ void MultipleStream(float** data, float* randArray, float** gpuTransposeMatrix, const int num_streams = 2; hipStream_t streams[num_streams]; - for (int i = 0; i < num_streams; i++) hipStreamCreate(&streams[i]); + for (int i = 0; i < num_streams; i++) checkHipErrors(hipStreamCreate(&streams[i])); for (int i = 0; i < num_streams; i++) { - hipMalloc((void**)&data[i], NUM * sizeof(float)); - hipMemcpyAsync(data[i], randArray, NUM * sizeof(float), hipMemcpyHostToDevice, streams[i]); + checkHipErrors(hipMalloc((void**)&data[i], NUM * sizeof(float))); + checkHipErrors(hipMemcpyAsync(data[i], randArray, NUM * sizeof(float), hipMemcpyHostToDevice, streams[i])); } hipLaunchKernelGGL(matrixTranspose_static_shared, @@ -84,12 +85,12 @@ void MultipleStream(float** data, float* randArray, float** gpuTransposeMatrix, streams[1], gpuTransposeMatrix[1], data[1], width); for (int i = 0; i < num_streams; i++) - hipMemcpyAsync(TransposeMatrix[i], gpuTransposeMatrix[i], NUM * sizeof(float), - hipMemcpyDeviceToHost, streams[i]); + checkHipErrors(hipMemcpyAsync(TransposeMatrix[i], gpuTransposeMatrix[i], NUM * sizeof(float), + hipMemcpyDeviceToHost, streams[i])); } int main() { - hipSetDevice(0); + checkHipErrors(hipSetDevice(0)); float *data[2], *TransposeMatrix[2], *gpuTransposeMatrix[2], *randArray; @@ -100,8 +101,8 @@ int main() { TransposeMatrix[0] = (float*)malloc(NUM * sizeof(float)); TransposeMatrix[1] = (float*)malloc(NUM * sizeof(float)); - hipMalloc((void**)&gpuTransposeMatrix[0], NUM * sizeof(float)); - hipMalloc((void**)&gpuTransposeMatrix[1], NUM * sizeof(float)); + checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix[0], NUM * sizeof(float))); + checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix[1], NUM * sizeof(float))); for (int i = 0; i < NUM; i++) { randArray[i] = (float)i * 1.0f; @@ -109,7 +110,7 @@ int main() { MultipleStream(data, randArray, gpuTransposeMatrix, TransposeMatrix, width); - hipDeviceSynchronize(); + checkHipErrors(hipDeviceSynchronize()); // verify the results int errors = 0; @@ -128,11 +129,11 @@ int main() { free(randArray); for (int i = 0; i < 2; i++) { - hipFree(data[i]); - hipFree(gpuTransposeMatrix[i]); + checkHipErrors(hipFree(data[i])); + checkHipErrors(hipFree(gpuTransposeMatrix[i])); free(TransposeMatrix[i]); } - hipDeviceReset(); + checkHipErrors(hipDeviceReset()); return 0; } diff --git a/samples/2_Cookbook/9_unroll/CMakeLists.txt b/samples/2_Cookbook/9_unroll/CMakeLists.txt index 258138f2b9..070f66fe23 100644 --- a/samples/2_Cookbook/9_unroll/CMakeLists.txt +++ b/samples/2_Cookbook/9_unroll/CMakeLists.txt @@ -39,5 +39,7 @@ set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) # Create the excutable add_executable(unroll unroll.cpp) +target_include_directories(unroll PRIVATE ../../common) + # Link with HIP target_link_libraries(unroll hip::host) diff --git a/samples/2_Cookbook/9_unroll/Makefile b/samples/2_Cookbook/9_unroll/Makefile index 657f879ac5..49bf51ece1 100644 --- a/samples/2_Cookbook/9_unroll/Makefile +++ b/samples/2_Cookbook/9_unroll/Makefile @@ -36,6 +36,7 @@ TARGET=hcc SOURCES = unroll.cpp OBJECTS = $(SOURCES:.cpp=.o) +INCLUDES := -I../../common EXECUTABLE=./unroll @@ -44,7 +45,7 @@ EXECUTABLE=./unroll all: $(EXECUTABLE) test -CXXFLAGS =-g +CXXFLAGS =-g $(INCLUDES) CXX=$(HIPCC) diff --git a/samples/2_Cookbook/9_unroll/unroll.cpp b/samples/2_Cookbook/9_unroll/unroll.cpp index 18f910a5dd..230ff60112 100644 --- a/samples/2_Cookbook/9_unroll/unroll.cpp +++ b/samples/2_Cookbook/9_unroll/unroll.cpp @@ -24,6 +24,7 @@ THE SOFTWARE. // hip header file #include "hip/hip_runtime.h" +#include "hip_helper.h" #define LENGTH 4 @@ -59,7 +60,7 @@ int main() { int* gpuSumMatrix; hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); + checkHipErrors(hipGetDeviceProperties(&devProp, 0)); std::cout << "Device name " << devProp.name << std::endl; @@ -76,19 +77,19 @@ int main() { } // Allocated Device Memory - hipMalloc((void**)&gpuMatrix, SIZE * sizeof(int)); - hipMalloc((void**)&gpuSumMatrix, LENGTH * sizeof(int)); + checkHipErrors(hipMalloc((void**)&gpuMatrix, SIZE * sizeof(int))); + checkHipErrors(hipMalloc((void**)&gpuSumMatrix, LENGTH * sizeof(int))); // Memory Copy to Device - hipMemcpy(gpuMatrix, Matrix, SIZE * sizeof(int), hipMemcpyHostToDevice); - hipMemcpy(gpuSumMatrix, cpuSumMatrix, LENGTH * sizeof(float), hipMemcpyHostToDevice); + checkHipErrors(hipMemcpy(gpuMatrix, Matrix, SIZE * sizeof(int), hipMemcpyHostToDevice)); + checkHipErrors(hipMemcpy(gpuSumMatrix, cpuSumMatrix, LENGTH * sizeof(float), hipMemcpyHostToDevice)); // Launch device kernels hipLaunchKernelGGL(gpuMatrixRowSum, dim3(BLOCKS_PER_GRID), dim3(THREADS_PER_BLOCK), 0, 0, gpuMatrix, gpuSumMatrix, LENGTH); // Memory copy back to device - hipMemcpy(sumMatrix, gpuSumMatrix, LENGTH * sizeof(int), hipMemcpyDeviceToHost); + checkHipErrors(hipMemcpy(sumMatrix, gpuSumMatrix, LENGTH * sizeof(int), hipMemcpyDeviceToHost)); // Cpu implementation matrixRowSum(Matrix, cpuSumMatrix, LENGTH); @@ -110,8 +111,8 @@ int main() { } // GPU Free - hipFree(gpuMatrix); - hipFree(gpuSumMatrix); + checkHipErrors(hipFree(gpuMatrix)); + checkHipErrors(hipFree(gpuSumMatrix)); // CPU Free free(Matrix); diff --git a/samples/common/hip_helper.h b/samples/common/hip_helper.h new file mode 100644 index 0000000000..4b25346c92 --- /dev/null +++ b/samples/common/hip_helper.h @@ -0,0 +1,38 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "hip/hip_runtime.h" + +#ifndef checkHipErrors +#define checkHipErrors(err) __checkHipErrors(err, __FILE__, __LINE__) + +inline void __checkHipErrors(hipError_t err, const char *file, const int line) { + if (HIP_SUCCESS != err) { + const char *errorStr = hipGetErrorString(err); + fprintf(stderr, + "checkHipErrors() HIP API error = %04d \"%s\" from file <%s>, " + "line %i.\n", + err, errorStr, file, line); + exit(EXIT_FAILURE); + } +} +#endif