SWDEV-402381 - Add hipCheckErrors for HIP API calls in samples (#375)

Change-Id: I335d7e780362fc59fd2d90939b4c8b8a7231ffc7
This commit is contained in:
ROCm CI Service Account
2023-07-20 10:22:17 +05:30
committed by GitHub
parent b8fb6f88b9
commit 7cc53f992f
71 changed files with 460 additions and 448 deletions
@@ -48,5 +48,7 @@ set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE})
# Create the excutable
add_executable(bit_extract bit_extract.cpp)
target_include_directories(bit_extract PRIVATE ../../common)
# Link with HIP
target_link_libraries(bit_extract hip::host)
+2 -1
View File
@@ -29,6 +29,7 @@ ifeq (,$(HIP_PATH))
endif
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform)
HIPCC=$(HIP_PATH)/bin/hipcc
INCLUDES := -I../../common
# Show how to use PLATFORM to specify different options for each compiler:
ifeq (${HIP_PLATFORM}, nvcc)
@@ -38,7 +39,7 @@ endif
EXE=bit_extract
$(EXE): bit_extract.cpp
$(HIPCC) $(HIPCC_FLAGS) $< -o $@
$(HIPCC) $(HIPCC_FLAGS) $(INCLUDES) $< -o $@
all: $(EXE)
+10 -19
View File
@@ -23,16 +23,7 @@ THE SOFTWARE.
#include <stdio.h>
#include <iostream>
#include "hip/hip_runtime.h"
#define CHECK(cmd) \
{ \
hipError_t error = cmd; \
if (error != hipSuccess) { \
fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error, \
__FILE__, __LINE__); \
exit(EXIT_FAILURE); \
} \
}
#include "hip_helper.h"
__global__ void bit_extract_kernel(uint32_t* C_d, const uint32_t* A_d, size_t N) {
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
@@ -69,28 +60,28 @@ int main(int argc, char* argv[]) {
#endif
int deviceId;
CHECK(hipGetDevice(&deviceId));
checkHipErrors(hipGetDevice(&deviceId));
hipDeviceProp_t props;
CHECK(hipGetDeviceProperties(&props, deviceId));
checkHipErrors(hipGetDeviceProperties(&props, deviceId));
printf("info: running on device #%d %s\n", deviceId, props.name);
printf("info: allocate host mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0);
A_h = (uint32_t*)malloc(Nbytes);
CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess);
checkHipErrors(A_h == 0 ? hipErrorOutOfMemory : hipSuccess);
C_h = (uint32_t*)malloc(Nbytes);
CHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess);
checkHipErrors(C_h == 0 ? hipErrorOutOfMemory : hipSuccess);
for (size_t i = 0; i < N; i++) {
A_h[i] = i;
}
printf("info: allocate device mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0);
CHECK(hipMalloc(&A_d, Nbytes));
CHECK(hipMalloc(&C_d, Nbytes));
checkHipErrors(hipMalloc(&A_d, Nbytes));
checkHipErrors(hipMalloc(&C_d, Nbytes));
printf("info: copy Host2Device\n");
CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
checkHipErrors(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
printf("info: launch 'bit_extract_kernel' \n");
const unsigned blocks = 512;
@@ -98,7 +89,7 @@ int main(int argc, char* argv[]) {
hipLaunchKernelGGL(bit_extract_kernel, dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N);
printf("info: copy Device2Host\n");
CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
checkHipErrors(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
printf("info: check result\n");
for (size_t i = 0; i < N; i++) {
@@ -106,7 +97,7 @@ int main(int argc, char* argv[]) {
if (C_h[i] != Agold) {
fprintf(stderr, "mismatch detected.\n");
printf("%zu: %08x =? %08x (Ain=%08x)\n", i, C_h[i], Agold, A_h[i]);
CHECK(hipErrorUnknown);
checkHipErrors(hipErrorUnknown);
}
}
printf("PASSED!\n");
@@ -22,6 +22,8 @@ project(module_api)
cmake_minimum_required(VERSION 3.10)
include_directories(../../common)
if (NOT DEFINED ROCM_PATH )
set ( ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory." )
endif ()
+5 -4
View File
@@ -27,20 +27,21 @@ ifeq (,$(HIP_PATH))
endif
HIPCC=$(HIP_PATH)/bin/hipcc
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
INCLUDES := -I../../common
all: vcpy_kernel.code runKernel.hip.out launchKernelHcc.hip.out defaultDriver.hip.out
runKernel.hip.out: runKernel.cpp
$(HIPCC) $(HIPCC_FLAGS) $< -o $@
$(HIPCC) $(HIPCC_FLAGS) $(INCLUDES) $< -o $@
launchKernelHcc.hip.out: launchKernelHcc.cpp
$(HIPCC) $(HIPCC_FLAGS) $< -o $@
$(HIPCC) $(HIPCC_FLAGS) $(INCLUDES) $< -o $@
defaultDriver.hip.out: defaultDriver.cpp
$(HIPCC) $(HIPCC_FLAGS) $< -o $@
$(HIPCC) $(HIPCC_FLAGS) $(INCLUDES) $< -o $@
vcpy_kernel.code: vcpy_kernel.cpp
$(HIPCC) --genco $(GENCO_FLAGS) $^ -o $@
$(HIPCC) --genco $(GENCO_FLAGS) $(INCLUDES) $^ -o $@
clean:
rm -f *.code *.out
+14 -13
View File
@@ -24,6 +24,7 @@ THE SOFTWARE.
#include <iostream>
#include <fstream>
#include <vector>
#include "hip_helper.h"
#define LEN 64
#define SIZE LEN << 2
@@ -45,25 +46,25 @@ int main() {
hipInit(0);
hipDevice_t device;
hipCtx_t context;
hipDeviceGet(&device, 0);
hipCtxCreate(&context, 0, device);
checkHipErrors(hipDeviceGet(&device, 0));
checkHipErrors(hipCtxCreate(&context, 0, device));
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, SIZE);
checkHipErrors(hipMalloc((void**)&Ad, SIZE));
checkHipErrors(hipMalloc((void**)&Bd, SIZE));
hipMemcpyHtoD(Ad, A, SIZE);
hipMemcpyHtoD(Bd, B, SIZE);
checkHipErrors(hipMemcpyHtoD(Ad, A, SIZE));
checkHipErrors(hipMemcpyHtoD(Bd, B, SIZE));
hipModule_t Module;
hipFunction_t Function;
hipModuleLoad(&Module, fileName);
hipModuleGetFunction(&Function, Module, kernel_name);
checkHipErrors(hipModuleLoad(&Module, fileName));
checkHipErrors(hipModuleGetFunction(&Function, Module, kernel_name));
void* args[2] = {&Ad, &Bd};
hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, args, nullptr);
checkHipErrors(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, args, nullptr));
hipMemcpyDtoH(B, Bd, SIZE);
checkHipErrors(hipMemcpyDtoH(B, Bd, SIZE));
int mismatchCount = 0;
for (uint32_t i = 0; i < LEN; i++) {
if (A[i] != B[i]) {
@@ -78,10 +79,10 @@ int main() {
std::cout << "FAILED!\n";
};
hipFree(Ad);
hipFree(Bd);
checkHipErrors(hipFree(Ad));
checkHipErrors(hipFree(Bd));
delete[] A;
delete[] B;
hipCtxDestroy(context);
checkHipErrors(hipCtxDestroy(context));
return 0;
}
+14 -19
View File
@@ -25,6 +25,7 @@ THE SOFTWARE.
#include <iostream>
#include <fstream>
#include <vector>
#include "hip_helper.h"
#ifdef __HIP_PLATFORM_AMD__
#include <hip/hip_ext.h>
@@ -36,12 +37,6 @@ THE SOFTWARE.
#define fileName "vcpy_kernel.code"
#define kernel_name "hello_world"
#define HIP_CHECK(status) \
if (status != hipSuccess) { \
std::cout << "Got Status: " << status << " at Line: " << __LINE__ << std::endl; \
exit(0); \
}
int main() {
float *A, *B;
hipDeviceptr_t Ad, Bd;
@@ -56,18 +51,18 @@ int main() {
hipInit(0);
hipDevice_t device;
hipCtx_t context;
hipDeviceGet(&device, 0);
hipCtxCreate(&context, 0, device);
checkHipErrors(hipDeviceGet(&device, 0));
checkHipErrors(hipCtxCreate(&context, 0, device));
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, SIZE);
checkHipErrors(hipMalloc((void**)&Ad, SIZE));
checkHipErrors(hipMalloc((void**)&Bd, SIZE));
hipMemcpyHtoD(Ad, A, SIZE);
hipMemcpyHtoD(Bd, B, SIZE);
checkHipErrors(hipMemcpyHtoD(Ad, A, SIZE));
checkHipErrors(hipMemcpyHtoD(Bd, B, SIZE));
hipModule_t Module;
hipFunction_t Function;
HIP_CHECK(hipModuleLoad(&Module, fileName));
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
checkHipErrors(hipModuleLoad(&Module, fileName));
checkHipErrors(hipModuleGetFunction(&Function, Module, kernel_name));
struct {
void* _Ad;
@@ -83,10 +78,10 @@ int main() {
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
HIP_LAUNCH_PARAM_END};
HIP_CHECK(
checkHipErrors(
hipExtModuleLaunchKernel(Function, LEN, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config, 0));
hipMemcpyDtoH(B, Bd, SIZE);
checkHipErrors(hipMemcpyDtoH(B, Bd, SIZE));
int mismatchCount = 0;
for (uint32_t i = 0; i < LEN; i++) {
@@ -102,10 +97,10 @@ int main() {
std::cout << "FAILED!\n";
};
hipFree(Ad);
hipFree(Bd);
checkHipErrors(hipFree(Ad));
checkHipErrors(hipFree(Bd));
delete[] A;
delete[] B;
hipCtxDestroy(context);
checkHipErrors(hipCtxDestroy(context));
return 0;
}
+13 -18
View File
@@ -26,6 +26,7 @@ THE SOFTWARE.
#include <fstream>
#include <vector>
#include <hip/hip_hcc.h>
#include "hip_helper.h"
#define LEN 64
#define SIZE LEN << 2
@@ -33,12 +34,6 @@ THE SOFTWARE.
#define fileName "vcpy_kernel.code"
#define kernel_name "hello_world"
#define HIP_CHECK(status) \
if (status != hipSuccess) { \
std::cout << "Got Status: " << status << " at Line: " << __LINE__ << std::endl; \
exit(0); \
}
int main() {
float *A, *B;
hipDeviceptr_t Ad, Bd;
@@ -53,18 +48,18 @@ int main() {
hipInit(0);
hipDevice_t device;
hipCtx_t context;
hipDeviceGet(&device, 0);
hipCtxCreate(&context, 0, device);
checkHipErrors(hipDeviceGet(&device, 0));
checkHipErrors(hipCtxCreate(&context, 0, device));
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, SIZE);
checkHipErrors(hipMalloc((void**)&Ad, SIZE));
checkHipErrors(hipMalloc((void**)&Bd, SIZE));
hipMemcpyHtoD(Ad, A, SIZE);
hipMemcpyHtoD(Bd, B, SIZE);
checkHipErrors(hipMemcpyHtoD(Ad, A, SIZE));
checkHipErrors(hipMemcpyHtoD(Bd, B, SIZE));
hipModule_t Module;
hipFunction_t Function;
HIP_CHECK(hipModuleLoad(&Module, fileName));
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
checkHipErrors(hipModuleLoad(&Module, fileName));
checkHipErrors(hipModuleGetFunction(&Function, Module, kernel_name));
struct {
void* _Ad;
@@ -79,9 +74,9 @@ int main() {
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
HIP_LAUNCH_PARAM_END};
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
checkHipErrors(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
hipMemcpyDtoH(B, Bd, SIZE);
checkHipErrors(hipMemcpyDtoH(B, Bd, SIZE));
int mismatchCount = 0;
for (uint32_t i = 0; i < LEN; i++) {
@@ -97,10 +92,10 @@ int main() {
std::cout << "FAILED!\n";
};
hipFree(Ad);
checkHipErrors(hipFree(Ad));
hipFree(Bd);
delete[] A;
delete[] B;
hipCtxDestroy(context);
checkHipErrors(hipCtxDestroy(context));
return 0;
}
@@ -50,5 +50,7 @@ add_custom_target(
add_dependencies(runKernel.hip.out codeobj)
target_include_directories(runKernel.hip.out PRIVATE ../../common)
# Link with HIP
target_link_libraries(runKernel.hip.out hip::host)
+2 -1
View File
@@ -27,11 +27,12 @@ ifeq (,$(HIP_PATH))
endif
HIPCC=$(HIP_PATH)/bin/hipcc
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
INCLUDES := -I../../common
all: vcpy_kernel.code runKernel.hip.out
runKernel.hip.out: runKernel.cpp
$(HIPCC) $(HIPCC_FLAGS) $< -o $@
$(HIPCC) $(HIPCC_FLAGS) $(INCLUDES) $< -o $@
vcpy_kernel.code: vcpy_kernel.cpp
$(HIPCC) --genco $(GENCO_FLAGS) $^ -o $@
+12 -12
View File
@@ -31,7 +31,7 @@ THE SOFTWARE.
#define SIZE LEN * sizeof(float)
#define fileName "vcpy_kernel.code"
#define HIP_CHECK(cmd) \
#define checkHipErrors(cmd) \
{ \
hipError_t status = cmd; \
if (status != hipSuccess) { \
@@ -64,23 +64,23 @@ int main() {
hipMemcpyHtoD(hipDeviceptr_t(Ad), A, SIZE);
hipMemcpyHtoD((hipDeviceptr_t)(Bd), B, SIZE);
hipModule_t Module;
HIP_CHECK(hipModuleLoad(&Module, fileName));
checkHipErrors(hipModuleLoad(&Module, fileName));
float myDeviceGlobal_h = 42.0;
float* deviceGlobal;
size_t deviceGlobalSize;
HIP_CHECK(hipModuleGetGlobal((void**)&deviceGlobal, &deviceGlobalSize, Module, "myDeviceGlobal"));
HIP_CHECK(hipMemcpyHtoD(hipDeviceptr_t(deviceGlobal), &myDeviceGlobal_h, deviceGlobalSize));
checkHipErrors(hipModuleGetGlobal((void**)&deviceGlobal, &deviceGlobalSize, Module, "myDeviceGlobal"));
checkHipErrors(hipMemcpyHtoD(hipDeviceptr_t(deviceGlobal), &myDeviceGlobal_h, deviceGlobalSize));
#define ARRAY_SIZE 16
float myDeviceGlobalArray_h[ARRAY_SIZE];
float *myDeviceGlobalArray;
size_t myDeviceGlobalArraySize;
HIP_CHECK(hipModuleGetGlobal((void**)&myDeviceGlobalArray, &myDeviceGlobalArraySize, Module, "myDeviceGlobalArray"));
checkHipErrors(hipModuleGetGlobal((void**)&myDeviceGlobalArray, &myDeviceGlobalArraySize, Module, "myDeviceGlobalArray"));
for (int i = 0; i < ARRAY_SIZE; i++) {
myDeviceGlobalArray_h[i] = i * 1000.0f;
HIP_CHECK(hipMemcpyHtoD(hipDeviceptr_t(myDeviceGlobalArray), &myDeviceGlobalArray_h, myDeviceGlobalArraySize));
checkHipErrors(hipMemcpyHtoD(hipDeviceptr_t(myDeviceGlobalArray), &myDeviceGlobalArray_h, myDeviceGlobalArraySize));
}
struct {
@@ -98,8 +98,8 @@ int main() {
{
hipFunction_t Function;
HIP_CHECK(hipModuleGetFunction(&Function, Module, "hello_world"));
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
checkHipErrors(hipModuleGetFunction(&Function, Module, "hello_world"));
checkHipErrors(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
hipMemcpyDtoH(B, Bd, SIZE);
@@ -123,13 +123,13 @@ int main() {
{
hipFunction_t Function;
HIP_CHECK(hipModuleGetFunction(&Function, Module, "test_globals"));
checkHipErrors(hipModuleGetFunction(&Function, Module, "test_globals"));
int val =-1;
HIP_CHECK(hipFuncGetAttribute(&val, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES,Function));
checkHipErrors(hipFuncGetAttribute(&val, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES,Function));
printf("Shared Size Bytes = %d\n",val);
HIP_CHECK(hipFuncGetAttribute(&val, HIP_FUNC_ATTRIBUTE_NUM_REGS, Function));
checkHipErrors(hipFuncGetAttribute(&val, HIP_FUNC_ATTRIBUTE_NUM_REGS, Function));
printf("Num Regs = %d\n",val);
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
checkHipErrors(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
hipMemcpyDtoH(B, Bd, SIZE);
@@ -22,6 +22,8 @@ project(hipDispatchLatency)
cmake_minimum_required(VERSION 3.10)
include_directories(../../common)
if (NOT DEFINED ROCM_PATH )
set ( ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory." )
endif ()
+2 -1
View File
@@ -26,8 +26,9 @@ ifeq (,$(HIP_PATH))
HIP_PATH=../../..
endif
HIPCC=$(HIP_PATH)/bin/hipcc -std=c++11
INCLUDES := -I../../common
CXXFLAGS = -O3
CXXFLAGS = -O3 $(INCLUDES)
all: test_kernel.code hipDispatchLatency.out hipDispatchEnqueueRateMT.out
@@ -22,6 +22,7 @@ THE SOFTWARE.
#ifdef __HIP_PLATFORM_AMD__
#include "hip/hip_ext.h"
#endif
#include "hip_helper.h"
#include <iostream>
#include <fstream>
#include <chrono>
@@ -41,16 +42,6 @@ THE SOFTWARE.
#define failed(...) \
abort();
#define HIPCHECK(error) \
{ \
hipError_t localError = error; \
if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) { \
printf("error: '%s'(%d) from %s at %s:%d\n", hipGetErrorString(localError), \
localError, #error, __FILE__, __LINE__); \
failed("API returned error code."); \
} \
}
__global__ void EmptyKernel() {}
@@ -87,12 +78,12 @@ void hipModuleLaunchKernel_enqueue_rate(const std::vector<char>& buffer, std::at
{
//resources necessary for this thread
hipStream_t stream;
HIPCHECK(hipStreamCreate(&stream));
checkHipErrors(hipStreamCreate(&stream));
hipModule_t module;
hipFunction_t function;
HIPCHECK(hipModuleLoadData(&module, &buffer[0]));
HIPCHECK(hipModuleGetFunction(&function, module, "test"));
checkHipErrors(hipModuleLoadData(&module, &buffer[0]));
checkHipErrors(hipModuleGetFunction(&function, module, "test"));
void* kernel_params = nullptr;
std::array<float, TOTAL_RUN_COUNT> results;
@@ -103,13 +94,13 @@ void hipModuleLaunchKernel_enqueue_rate(const std::vector<char>& buffer, std::at
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
auto start = std::chrono::high_resolution_clock::now();
HIPCHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, stream, &kernel_params, nullptr));
checkHipErrors(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, stream, &kernel_params, nullptr));
auto stop = std::chrono::high_resolution_clock::now();
results[i] = std::chrono::duration<double, std::milli>(stop - start).count();
}
HIPCHECK(hipModuleUnload(module));
checkHipErrors(hipModuleUnload(module));
print_timing("Thread ID : " + std::to_string(tid) + " , " + "hipModuleLaunchKernel enqueue rate", results);
HIPCHECK(hipStreamDestroy(stream));
checkHipErrors(hipStreamDestroy(stream));
}
// Measure time taken to enqueue a kernel on the GPU using hipLaunchKernelGGL
@@ -117,7 +108,7 @@ void hipLaunchKernelGGL_enqueue_rate(const std::vector<char>& buffer, std::atomi
{
//resources necessary for this thread
hipStream_t stream;
HIPCHECK(hipStreamCreate(&stream));
checkHipErrors(hipStreamCreate(&stream));
std::array<float, TOTAL_RUN_COUNT> results;
//synchronize all threads, before running
@@ -131,7 +122,7 @@ void hipLaunchKernelGGL_enqueue_rate(const std::vector<char>& buffer, std::atomi
results[i] = std::chrono::duration<double, std::milli>(stop - start).count();
}
print_timing("Thread ID : " + std::to_string(tid) + " , " + "hipLaunchKernelGGL enqueue rate", results);
HIPCHECK(hipStreamDestroy(stream));
checkHipErrors(hipStreamDestroy(stream));
}
// Simple thread pool
@@ -21,6 +21,7 @@ THE SOFTWARE.
#ifdef __HIP_PLATFORM_AMD__
#include "hip/hip_ext.h"
#endif
#include "hip_helper.h"
#include <iostream>
#include <chrono>
#include <algorithm>
@@ -66,19 +67,19 @@ void print_timing(std::string test, const std::array<float, TOTAL_RUN_COUNT> &re
int main() {
hipStream_t stream0 = 0;
hipDevice_t device;
hipDeviceGet(&device, 0);
checkHipErrors(hipDeviceGet(&device, 0));
hipCtx_t context;
hipCtxCreate(&context, 0, device);
checkHipErrors(hipCtxCreate(&context, 0, device));
hipModule_t module;
hipFunction_t function;
hipModuleLoad(&module, FILE_NAME);
hipModuleGetFunction(&function, module, KERNEL_NAME);
checkHipErrors(hipModuleLoad(&module, FILE_NAME));
checkHipErrors(hipModuleGetFunction(&function, module, KERNEL_NAME));
void* params = nullptr;
std::array<float, TOTAL_RUN_COUNT> results;
hipEvent_t start, stop;
hipEventCreate(&start);
hipEventCreate(&stop);
checkHipErrors(hipEventCreate(&start));
checkHipErrors(hipEventCreate(&stop));
/************************************************************************************/
/* HIP kernel launch enqueue rate: */
@@ -88,7 +89,7 @@ int main() {
// Timing hipModuleLaunchKernel
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
auto start = std::chrono::high_resolution_clock::now();
hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, &params, nullptr);
checkHipErrors(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, &params, nullptr));
auto stop = std::chrono::high_resolution_clock::now();
results[i] = std::chrono::duration<float, std::milli>(stop - start).count();
}
@@ -110,11 +111,11 @@ int main() {
//Timing around the dispatch
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
hipEventRecord(start, 0);
checkHipErrors(hipEventRecord(start, 0));
hipLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0);
hipEventRecord(stop, 0);
hipEventSynchronize(stop);
hipEventElapsedTime(&results[i], start, stop);
checkHipErrors(hipEventRecord(stop, 0));
checkHipErrors(hipEventSynchronize(stop));
checkHipErrors(hipEventElapsedTime(&results[i], start, stop));
}
print_timing("Timing around single dispatch latency", results);
@@ -124,18 +125,18 @@ int main() {
/*********************************************************************************/
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
hipEventRecord(start, 0);
checkHipErrors(hipEventRecord(start, 0));
for (int j = 0; j < BATCH_SIZE; j++) {
hipLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0);
}
hipEventRecord(stop, 0);
hipEventSynchronize(stop);
hipEventElapsedTime(&results[i], start, stop);
checkHipErrors(hipEventRecord(stop, 0));
checkHipErrors(hipEventSynchronize(stop));
checkHipErrors(hipEventElapsedTime(&results[i], start, stop));
}
print_timing("Batch dispatch latency", results, BATCH_SIZE);
hipEventDestroy(start);
hipEventDestroy(stop);
hipCtxDestroy(context);
checkHipErrors(hipEventDestroy(start));
checkHipErrors(hipEventDestroy(stop));
checkHipErrors(hipCtxDestroy(context));
}
+2
View File
@@ -57,6 +57,8 @@ add_executable(hipInfo hipInfo.cpp)
# Link with HIP
target_link_libraries(hipInfo hip::host)
target_include_directories(hipInfo PRIVATE ../../common)
# Used only when make install is called
# when hipInfo is built as part of compute project
# hipInfo.exe will be installed to install/hip/bin path
+2 -1
View File
@@ -26,13 +26,14 @@ ifeq (,$(HIP_PATH))
HIP_PATH=../../..
endif
HIPCC=$(HIP_PATH)/bin/hipcc
INCLUDES := -I../../common
EXE=hipInfo
all: install
$(EXE): hipInfo.cpp
$(HIPCC) hipInfo.cpp -o $@
$(HIPCC) hipInfo.cpp $(INCLUDES) -o $@
install: $(EXE)
cp $(EXE) $(HIP_PATH)/bin
+8 -21
View File
@@ -23,6 +23,7 @@ THE SOFTWARE.
#include <iostream>
#include <iomanip>
#include "hip/hip_runtime.h"
#include "hip_helper.h"
#define KNRM "\x1B[0m"
#define KRED "\x1B[31m"
@@ -33,20 +34,6 @@ THE SOFTWARE.
#define KCYN "\x1B[36m"
#define KWHT "\x1B[37m"
#define failed(...) \
printf("%serror: ", KRED); \
printf(__VA_ARGS__); \
printf("\n"); \
printf("error: TEST FAILED\n%s", KNRM); \
exit(EXIT_FAILURE);
#define HIPCHECK(error) \
if (error != hipSuccess) { \
printf("%serror: '%s'(%d) at %s:%d%s\n", KRED, hipGetErrorString(error), error, __FILE__, \
__LINE__, KNRM); \
failed("API returned error code."); \
}
void printCompilerInfo() {
#ifdef __NVCC__
printf("compiler: nvcc\n");
@@ -76,7 +63,7 @@ void printDeviceProp(int deviceId) {
cout << setw(w1) << "device#" << deviceId << endl;
hipDeviceProp_t props = {0};
HIPCHECK(hipGetDeviceProperties(&props, deviceId));
checkHipErrors(hipGetDeviceProperties(&props, deviceId));
cout << setw(w1) << "Name: " << props.name << endl;
cout << setw(w1) << "pciBusID: " << props.pciBusID << endl;
@@ -149,11 +136,11 @@ void printDeviceProp(int deviceId) {
cout << setw(w1) << "gcnArchName: " << props.gcnArchName << endl;
#endif
int deviceCnt;
hipGetDeviceCount(&deviceCnt);
checkHipErrors(hipGetDeviceCount(&deviceCnt));
cout << setw(w1) << "peers: ";
for (int i = 0; i < deviceCnt; i++) {
int isPeer;
hipDeviceCanAccessPeer(&isPeer, i, deviceId);
checkHipErrors(hipDeviceCanAccessPeer(&isPeer, i, deviceId));
if (isPeer) {
cout << "device#" << i << " ";
}
@@ -162,7 +149,7 @@ void printDeviceProp(int deviceId) {
cout << setw(w1) << "non-peers: ";
for (int i = 0; i < deviceCnt; i++) {
int isPeer;
hipDeviceCanAccessPeer(&isPeer, i, deviceId);
checkHipErrors(hipDeviceCanAccessPeer(&isPeer, i, deviceId));
if (!isPeer) {
cout << "device#" << i << " ";
}
@@ -185,7 +172,7 @@ void printDeviceProp(int deviceId) {
size_t free, total;
hipMemGetInfo(&free, &total);
checkHipErrors(hipMemGetInfo(&free, &total));
cout << fixed << setprecision(2);
cout << setw(w1) << "memInfo.total: " << bytesToGB(total) << " GB" << endl;
@@ -202,10 +189,10 @@ int main(int argc, char* argv[]) {
int deviceCnt;
HIPCHECK(hipGetDeviceCount(&deviceCnt));
checkHipErrors(hipGetDeviceCount(&deviceCnt));
for (int i = 0; i < deviceCnt; i++) {
hipSetDevice(i);
checkHipErrors(hipSetDevice(i));
printDeviceProp(i);
}
@@ -40,5 +40,7 @@ set(CMAKE_BUILD_TYPE Release)
# Create the excutable
add_executable(MatrixTranspose MatrixTranspose.cpp)
target_include_directories(MatrixTranspose PRIVATE ../../common)
# Link with HIP
target_link_libraries(MatrixTranspose hip::host)
@@ -30,6 +30,7 @@ HIPCC=$(HIP_PATH)/bin/hipcc
TARGET=hcc
INCLUDES := -I../../common
SOURCES = MatrixTranspose.cpp
OBJECTS = $(SOURCES:.cpp=.o)
@@ -40,7 +41,7 @@ EXECUTABLE=./MatrixTranspose
all: $(EXECUTABLE) test
CXXFLAGS =-g
CXXFLAGS =-g $(INCLUDES)
CXX=$(HIPCC)
@@ -24,6 +24,7 @@ THE SOFTWARE.
// hip header file
#include "hip/hip_runtime.h"
#include "hip_helper.h"
#define WIDTH 1024
@@ -61,7 +62,7 @@ int main() {
float* gpuTransposeMatrix;
hipDeviceProp_t devProp;
hipGetDeviceProperties(&devProp, 0);
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
std::cout << "Device name " << devProp.name << std::endl;
@@ -78,11 +79,11 @@ int main() {
}
// allocate the memory on the device side
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
// Memory transfer from host to device
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
// Lauching kernel from host
hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
@@ -90,7 +91,7 @@ int main() {
gpuMatrix, WIDTH);
// Memory transfer from device to host
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
// CPU MatrixTranspose computation
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
@@ -110,8 +111,8 @@ int main() {
}
// free the resources on device side
hipFree(gpuMatrix);
hipFree(gpuTransposeMatrix);
checkHipErrors(hipFree(gpuMatrix));
checkHipErrors(hipFree(gpuTransposeMatrix));
// free the resources on host side
free(Matrix);
@@ -40,5 +40,7 @@ set(CMAKE_BUILD_TYPE Release)
# Create the excutable
add_executable(inline_asm inline_asm.cpp)
target_include_directories(inline_asm PRIVATE ../../common)
# Link with HIP
target_link_libraries(inline_asm hip::host)
+2 -2
View File
@@ -32,7 +32,7 @@ TARGET=hcc
SOURCES = inline_asm.cpp
OBJECTS = $(SOURCES:.cpp=.o)
INCLUDES := -I../../common
EXECUTABLE=./inline_asm
.PHONY: test
@@ -40,7 +40,7 @@ EXECUTABLE=./inline_asm
all: $(EXECUTABLE) test
CXXFLAGS =-g
CXXFLAGS =-g $(INCLUDES)
CXX=$(HIPCC)
+22 -21
View File
@@ -24,6 +24,7 @@ THE SOFTWARE.
// hip header file
#include "hip/hip_runtime.h"
#include "hip_helper.h"
#define WIDTH 1024
@@ -59,13 +60,13 @@ int main() {
float* gpuTransposeMatrix;
hipDeviceProp_t devProp;
hipGetDeviceProperties(&devProp, 0);
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
std::cout << "Device name " << devProp.name << std::endl;
hipEvent_t start, stop;
hipEventCreate(&start);
hipEventCreate(&stop);
checkHipErrors(hipEventCreate(&start));
checkHipErrors(hipEventCreate(&stop));
float eventMs = 1.0f;
int i;
@@ -81,25 +82,25 @@ int main() {
}
// allocate the memory on the device side
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
// Record the start event
hipEventRecord(start, NULL);
checkHipErrors(hipEventRecord(start, NULL));
// Memory transfer from host to device
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
// Record the stop event
hipEventRecord(stop, NULL);
hipEventSynchronize(stop);
checkHipErrors(hipEventRecord(stop, NULL));
checkHipErrors(hipEventSynchronize(stop));
hipEventElapsedTime(&eventMs, start, stop);
checkHipErrors(hipEventElapsedTime(&eventMs, start, stop));
printf("hipMemcpyHostToDevice time taken = %6.3fms\n", eventMs);
// Record the start event
hipEventRecord(start, NULL);
checkHipErrors(hipEventRecord(start, NULL));
// Lauching kernel from host
hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
@@ -107,24 +108,24 @@ int main() {
gpuMatrix, WIDTH);
// Record the stop event
hipEventRecord(stop, NULL);
hipEventSynchronize(stop);
checkHipErrors(hipEventRecord(stop, NULL));
checkHipErrors(hipEventSynchronize(stop));
hipEventElapsedTime(&eventMs, start, stop);
checkHipErrors(hipEventElapsedTime(&eventMs, start, stop));
printf("kernel Execution time = %6.3fms\n", eventMs);
// Record the start event
hipEventRecord(start, NULL);
checkHipErrors(hipEventRecord(start, NULL));
// Memory transfer from device to host
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
// Record the stop event
hipEventRecord(stop, NULL);
hipEventSynchronize(stop);
checkHipErrors(hipEventRecord(stop, NULL));
checkHipErrors(hipEventSynchronize(stop));
hipEventElapsedTime(&eventMs, start, stop);
checkHipErrors(hipEventElapsedTime(&eventMs, start, stop));
printf("hipMemcpyDeviceToHost time taken = %6.3fms\n", eventMs);
@@ -147,8 +148,8 @@ int main() {
}
// free the resources on device side
hipFree(gpuMatrix);
hipFree(gpuTransposeMatrix);
checkHipErrors(hipFree(gpuMatrix));
checkHipErrors(hipFree(gpuTransposeMatrix));
// free the resources on host side
free(Matrix);
@@ -50,5 +50,7 @@ add_custom_target(
add_dependencies(texture2dDrv codeobj)
target_include_directories(texture2dDrv PRIVATE ../../common)
# Link with HIP
target_link_libraries(texture2dDrv hip::host)
@@ -27,11 +27,12 @@ ifeq (,$(HIP_PATH))
endif
HIPCC=$(HIP_PATH)/bin/hipcc
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
INCLUDES := -I../../common
all: tex2dKernel.code texture2dDrv.out
texture2dDrv.out: texture2dDrv.cpp
$(HIPCC) $(HIPCC_FLAGS) $< -o $@
$(HIPCC) $(HIPCC_FLAGS) $(INCLUDES) $< -o $@
tex2dKernel.code: tex2dKernel.cpp
$(HIPCC) --genco $(GENCO_FLAGS) $^ -o $@
@@ -24,21 +24,12 @@ THE SOFTWARE.
#include <iostream>
#include <fstream>
#include <vector>
#include "hip_helper.h"
#define fileName "tex2dKernel.code"
bool testResult = true;
#define HIP_CHECK(cmd) \
{ \
hipError_t status = cmd; \
if (status != hipSuccess) { \
std::cout << "error: #" << status << " (" << hipGetErrorString(status) \
<< ") at line:" << __LINE__ << ": " << #cmd << std::endl; \
abort(); \
} \
}
template<typename T,
typename std::enable_if<std::is_arithmetic<T>::value>::type *t = nullptr>
static inline hipArray_Format getArrayFormat() {
@@ -154,11 +145,11 @@ bool runTest(hipModule_t &module, const char *refName, const char *funcName) {
hipChannelFormatDesc channelDesc = hipCreateChannelDesc<T>();
hipArray_t array;
HIP_CHECK(hipMallocArray(&array, &channelDesc, width, height));
checkHipErrors(hipMallocArray(&array, &channelDesc, width, height));
const size_t spitch = width * sizeof(T);
HIP_CHECK(hipMemcpy2DToArray(array, 0, 0, hData, spitch, width * sizeof(T),
checkHipErrors(hipMemcpy2DToArray(array, 0, 0, hData, spitch, width * sizeof(T),
height, hipMemcpyHostToDevice));
hipResourceDesc resDesc;
@@ -175,10 +166,10 @@ bool runTest(hipModule_t &module, const char *refName, const char *funcName) {
texDesc.normalizedCoords = 0;
hipTextureObject_t texObj;
HIP_CHECK(hipCreateTextureObject(&texObj, &resDesc, &texDesc, nullptr));
checkHipErrors(hipCreateTextureObject(&texObj, &resDesc, &texDesc, nullptr));
T *dData = NULL;
HIP_CHECK(hipMalloc((void** )&dData, size));
checkHipErrors(hipMalloc((void** )&dData, size));
struct {
void *_Ad;
@@ -197,18 +188,18 @@ bool runTest(hipModule_t &module, const char *refName, const char *funcName) {
HIP_LAUNCH_PARAM_BUFFER_SIZE, &sizeTemp, HIP_LAUNCH_PARAM_END };
hipFunction_t Function;
HIP_CHECK(hipModuleGetFunction(&Function, module, funcName));
checkHipErrors(hipModuleGetFunction(&Function, module, funcName));
int temp1 = width / 16;
int temp2 = height / 16;
HIP_CHECK(
checkHipErrors(
hipModuleLaunchKernel(Function, 16, 16, 1, temp1, temp2, 1, 0, 0, NULL,
(void** )&config));
HIP_CHECK(hipDeviceSynchronize());
checkHipErrors(hipDeviceSynchronize());
T *hOutputData = (T*) malloc(size);
memset(hOutputData, 0, size);
HIP_CHECK(hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost));
checkHipErrors(hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost));
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
@@ -219,9 +210,9 @@ bool runTest(hipModule_t &module, const char *refName, const char *funcName) {
}
}
}
HIP_CHECK(hipDestroyTextureObject(texObj));
HIP_CHECK(hipFree(dData));
HIP_CHECK(hipFreeArray(array));
checkHipErrors(hipDestroyTextureObject(texObj));
checkHipErrors(hipFree(dData));
checkHipErrors(hipFreeArray(array));
free(hOutputData);
free(hData);
printf("%s test %s ...\n", funcName, testResult ? "PASSED" : "FAILED");
@@ -231,7 +222,7 @@ bool runTest(hipModule_t &module, const char *refName, const char *funcName) {
inline bool isImageSupported() {
int imageSupport = 1;
#ifdef __HIP_PLATFORM_AMD__
HIP_CHECK(hipDeviceGetAttribute(&imageSupport, hipDeviceAttributeImageSupport,
checkHipErrors(hipDeviceGetAttribute(&imageSupport, hipDeviceAttributeImageSupport,
0));
#endif
return imageSupport != 0;
@@ -242,10 +233,10 @@ int main(int argc, char** argv) {
printf("Texture is not support on the device. Skipped.\n");
return 0;
}
HIP_CHECK(hipInit(0));
HIP_CHECK(hipSetDevice(0));
checkHipErrors(hipInit(0));
checkHipErrors(hipSetDevice(0));
hipModule_t module;
HIP_CHECK(hipModuleLoad(&module, fileName));
checkHipErrors(hipModuleLoad(&module, fileName));
testResult = testResult && runTest<char>(module, "texChar", "tex2dKernelChar");
testResult = testResult && runTest<short>(module, "texShort", "tex2dKernelShort");
testResult = testResult && runTest<int>(module, "texInt", "tex2dKernelInt");
@@ -255,7 +246,7 @@ int main(int argc, char** argv) {
testResult = testResult && runTest<int4>(module, "texInt4", "tex2dKernelInt4");
testResult = testResult && runTest<float4>(module, "texFloat4", "tex2dKernelFloat4");
HIP_CHECK(hipModuleUnload(module));
checkHipErrors(hipModuleUnload(module));
printf("texture2dDrv %s ...\n", testResult ? "PASSED" : "FAILED");
return testResult ? EXIT_SUCCESS : EXIT_FAILURE;
}
@@ -51,6 +51,8 @@ set(MY_NVCC_OPTIONS)
set_source_files_properties(${MY_SOURCE_FILES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
hip_add_executable(${MY_TARGET_NAME} ${MY_SOURCE_FILES} HIPCC_OPTIONS ${MY_HIPCC_OPTIONS} CLANG_OPTIONS ${MY_CLANG_OPTIONS} NVCC_OPTIONS ${MY_NVCC_OPTIONS})
target_include_directories(${MY_TARGET_NAME} PRIVATE ../../common)
# Search for rocm in common locations
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH})
find_package(hip QUIET CONFIG)
@@ -24,6 +24,7 @@ THE SOFTWARE.
// hip header file
#include "hip/hip_runtime.h"
#include "hip_helper.h"
#define WIDTH 1024
@@ -61,7 +62,7 @@ int main() {
float* gpuTransposeMatrix;
hipDeviceProp_t devProp;
hipGetDeviceProperties(&devProp, 0);
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
std::cout << "Device name " << devProp.name << std::endl;
@@ -78,11 +79,11 @@ int main() {
}
// allocate the memory on the device side
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
// Memory transfer from host to device
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
// Lauching kernel from host
hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
@@ -90,7 +91,7 @@ int main() {
gpuMatrix, WIDTH);
// Memory transfer from device to host
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
// CPU MatrixTranspose computation
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
@@ -110,8 +111,8 @@ int main() {
}
// free the resources on device side
hipFree(gpuMatrix);
hipFree(gpuTransposeMatrix);
checkHipErrors(hipFree(gpuMatrix));
checkHipErrors(hipFree(gpuTransposeMatrix));
// free the resources on host side
free(Matrix);
@@ -40,5 +40,7 @@ set(CMAKE_BUILD_TYPE Release)
# Create the excutable
add_executable(occupancy occupancy.cpp)
target_include_directories(occupancy PRIVATE ../../common)
# Link with HIP
target_link_libraries(occupancy hip::host)
+2 -2
View File
@@ -26,7 +26,7 @@ ifeq (,$(HIP_PATH))
HIP_PATH=../../..
endif
HIPCC=$(HIP_PATH)/bin/hipcc
INCLUDES := -I../../common
EXE=./occupancy
.PHONY: test
@@ -34,7 +34,7 @@ EXE=./occupancy
all: test
$(EXE): occupancy.cpp
$(HIPCC) $^ -o $@
$(HIPCC) $(INCLUDES) $^ -o $@
test: $(EXE)
$(EXE)
+22 -27
View File
@@ -19,14 +19,9 @@ THE SOFTWARE.
#include "hip/hip_runtime.h"
#include <iostream>
#include "hip_helper.h"
#define NUM 1000000
#define HIP_CHECK(status) \
if (status != hipSuccess) { \
std::cout << "Got Status: " << status << " at Line: " << __LINE__ << std::endl; \
exit(0); \
}
// Device (Kernel) function
__global__ void multiply(float* C, float* A, float* B, int N){
@@ -47,11 +42,11 @@ void multiplyCPU(float* C, float* A, float* B, int N){
void launchKernel(float* C, float* A, float* B, bool manual){
hipDeviceProp_t devProp;
HIP_CHECK(hipGetDeviceProperties(&devProp, 0));
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
hipEvent_t start, stop;
HIP_CHECK(hipEventCreate(&start));
HIP_CHECK(hipEventCreate(&stop));
checkHipErrors(hipEventCreate(&start));
checkHipErrors(hipEventCreate(&stop));
float eventMs = 1.0f;
const unsigned threadsperblock = 32;
const unsigned blocks = (NUM/threadsperblock)+1;
@@ -66,28 +61,28 @@ void launchKernel(float* C, float* A, float* B, bool manual){
std::cout << std::endl << "Manual Configuration with block size " << blockSize << std::endl;
}
else{
HIP_CHECK(hipOccupancyMaxPotentialBlockSize(&mingridSize, &blockSize, multiply, 0, 0));
checkHipErrors(hipOccupancyMaxPotentialBlockSize(&mingridSize, &blockSize, multiply, 0, 0));
std::cout << std::endl << "Automatic Configuation based on hipOccupancyMaxPotentialBlockSize " << std::endl;
std::cout << "Suggested blocksize is " << blockSize << ", Minimum gridsize is " << mingridSize << std::endl;
gridSize = (NUM/blockSize)+1;
}
// Record the start event
HIP_CHECK(hipEventRecord(start, NULL));
checkHipErrors(hipEventRecord(start, NULL));
// Launching the Kernel from Host
hipLaunchKernelGGL(multiply, dim3(gridSize), dim3(blockSize), 0, 0, C, A, B, NUM);
// Record the stop event
HIP_CHECK(hipEventRecord(stop, NULL));
HIP_CHECK(hipEventSynchronize(stop));
checkHipErrors(hipEventRecord(stop, NULL));
checkHipErrors(hipEventSynchronize(stop));
HIP_CHECK(hipEventElapsedTime(&eventMs, start, stop));
checkHipErrors(hipEventElapsedTime(&eventMs, start, stop));
printf("kernel Execution time = %6.3fms\n", eventMs);
//Calculate Occupancy
int numBlock = 0;
HIP_CHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, multiply, blockSize, 0));
checkHipErrors(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, multiply, blockSize, 0));
if(devProp.maxThreadsPerMultiProcessor){
std::cout << "Theoretical Occupancy is " << (double)numBlock* blockSize/devProp.maxThreadsPerMultiProcessor * 100 << "%" << std::endl;
@@ -113,14 +108,14 @@ int main() {
}
// allocate the memory on the device side
HIP_CHECK(hipMalloc((void**)&Ad, NUM * sizeof(float)));
HIP_CHECK(hipMalloc((void**)&Bd, NUM * sizeof(float)));
HIP_CHECK(hipMalloc((void**)&C0d, NUM * sizeof(float)));
HIP_CHECK(hipMalloc((void**)&C1d, NUM * sizeof(float)));
checkHipErrors(hipMalloc((void**)&Ad, NUM * sizeof(float)));
checkHipErrors(hipMalloc((void**)&Bd, NUM * sizeof(float)));
checkHipErrors(hipMalloc((void**)&C0d, NUM * sizeof(float)));
checkHipErrors(hipMalloc((void**)&C1d, NUM * sizeof(float)));
// Memory transfer from host to device
HIP_CHECK(hipMemcpy(Ad,A,NUM * sizeof(float), hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(Bd,B,NUM * sizeof(float), hipMemcpyHostToDevice));
checkHipErrors(hipMemcpy(Ad,A,NUM * sizeof(float), hipMemcpyHostToDevice));
checkHipErrors(hipMemcpy(Bd,B,NUM * sizeof(float), hipMemcpyHostToDevice));
//Kernel launch with manual/default block size
launchKernel(C0d, Ad, Bd, 1);
@@ -129,8 +124,8 @@ int main() {
launchKernel(C1d, Ad, Bd, 0);
// Memory transfer from device to host
HIP_CHECK(hipMemcpy(C0,C0d, NUM * sizeof(float), hipMemcpyDeviceToHost));
HIP_CHECK(hipMemcpy(C1,C1d, NUM * sizeof(float), hipMemcpyDeviceToHost));
checkHipErrors(hipMemcpy(C0,C0d, NUM * sizeof(float), hipMemcpyDeviceToHost));
checkHipErrors(hipMemcpy(C1,C1d, NUM * sizeof(float), hipMemcpyDeviceToHost));
// CPU computation
multiplyCPU(cpuC, A, B, NUM);
@@ -163,10 +158,10 @@ int main() {
printf("\nAutomatic Test PASSED!\n");
}
HIP_CHECK(hipFree(Ad));
HIP_CHECK(hipFree(Bd));
HIP_CHECK(hipFree(C0d));
HIP_CHECK(hipFree(C1d));
checkHipErrors(hipFree(Ad));
checkHipErrors(hipFree(Bd));
checkHipErrors(hipFree(C0d));
checkHipErrors(hipFree(C1d));
free(A);
free(B);
@@ -40,5 +40,7 @@ set(CMAKE_BUILD_TYPE Release)
# Create the excutable
add_executable(gpuarch gpuarch.cpp)
target_include_directories(gpuarch PRIVATE ../../common)
# Link with HIP
target_link_libraries(gpuarch hip::host)
+2 -2
View File
@@ -26,7 +26,7 @@ ifeq (,$(HIP_PATH))
HIP_PATH=../../..
endif
HIPCC=$(HIP_PATH)/bin/hipcc
INCLUDES := -I../../common
EXE=./gpuarch
.PHONY: test
@@ -34,7 +34,7 @@ EXE=./gpuarch
all: test
$(EXE): gpuarch.cpp
$(HIPCC) $^ -o $@
$(HIPCC) $(INCLUDES) $^ -o $@
test: $(EXE)
$(EXE)
+4 -10
View File
@@ -25,12 +25,6 @@ THE SOFTWARE.
#define SIZE (BLOCKS_PER_GRID * THREADS_PER_BLOCK)
#define NOT_SUPPORTED -99 // dummy number indicates unsupported operation
#define HIP_STATUS_CHECK(status) \
if (status != hipSuccess) { \
std::cout << "Got Status: " << status << " at Line: " << __LINE__ << std::endl; \
exit(0); \
}
// Using __gfx*__ macro one can have GPU architecture specific code flow
// For example: If below kernel runs on gfx908 it will increment 'in' by 'value' and store into
// 'out'
@@ -57,8 +51,8 @@ int main() {
int32_t* hInput = static_cast<int32_t*>(malloc(NBytes));
int32_t* hOutput = static_cast<int32_t*>(malloc(NBytes));
HIP_STATUS_CHECK(hipMalloc(&dInput, NBytes));
HIP_STATUS_CHECK(hipMalloc(&dOutput, NBytes));
checkHipErrors(hipMalloc(&dInput, NBytes));
checkHipErrors(hipMalloc(&dOutput, NBytes));
// Initialize host input/output buffers
for (int i = 0; i < SIZE; ++i) {
@@ -67,14 +61,14 @@ int main() {
}
// Initialize device input buffer
HIP_STATUS_CHECK(hipMemcpy(dInput, hInput, NBytes, hipMemcpyHostToDevice));
checkHipErrors(hipMemcpy(dInput, hInput, NBytes, hipMemcpyHostToDevice));
// Launch kernel
hipLaunchKernelGGL(incrementKernel, dim3(BLOCKS_PER_GRID), dim3(THREADS_PER_BLOCK), 0, 0, dInput,
dOutput, incrementValue, SIZE);
// Copy result back to host buffer
HIP_STATUS_CHECK(hipMemcpy(hOutput, dOutput, NBytes, hipMemcpyDeviceToHost));
checkHipErrors(hipMemcpy(hOutput, dOutput, NBytes, hipMemcpyDeviceToHost));
bool flag = true;
// verify data
@@ -30,6 +30,7 @@ HIPCC=$(HIP_PATH)/bin/hipcc
CLANG=$(HIP_PATH)/llvm/bin/clang
LLVM_MC=$(HIP_PATH)/llvm/bin/llvm-mc
CLANG_OFFLOAD_BUNDLER=$(HIP_PATH)/llvm/bin/clang-offload-bundler
INCLUDES := -I../../common
SRCS=square.cpp
@@ -57,8 +58,8 @@ GPU_ARCH9=gfx1103
all: src_to_asm asm_to_exec
src_to_asm:
$(HIPCC) -c -S --cuda-host-only -target x86_64-linux-gnu -o $(SQ_HOST_ASM) $(SRCS)
$(HIPCC) -c -S --cuda-device-only --offload-arch=$(GPU_ARCH1) --offload-arch=$(GPU_ARCH2) --offload-arch=$(GPU_ARCH3) --offload-arch=$(GPU_ARCH4) --offload-arch=$(GPU_ARCH5) --offload-arch=$(GPU_ARCH6) --offload-arch=$(GPU_ARCH7) --offload-arch=$(GPU_ARCH8) --offload-arch=$(GPU_ARCH9) $(SRCS)
$(HIPCC) -c -S $(INCLUDES) --cuda-host-only -target x86_64-linux-gnu -o $(SQ_HOST_ASM) $(SRCS)
$(HIPCC) -c -S $(INCLUDES) --cuda-device-only --offload-arch=$(GPU_ARCH1) --offload-arch=$(GPU_ARCH2) --offload-arch=$(GPU_ARCH3) --offload-arch=$(GPU_ARCH4) --offload-arch=$(GPU_ARCH5) --offload-arch=$(GPU_ARCH6) --offload-arch=$(GPU_ARCH7) --offload-arch=$(GPU_ARCH8) --offload-arch=$(GPU_ARCH9) $(SRCS)
# You may modify the .s assembly files before the next step
# By default, their names will be:
@@ -19,15 +19,7 @@ THE SOFTWARE.
#include <stdio.h>
#include <hip/hip_runtime.h>
#define CHECK(cmd) \
{\
hipError_t error = cmd;\
if (error != hipSuccess) { \
fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \
exit(EXIT_FAILURE);\
}\
}
#include "hip_helper.h"
/* This kernel is a placeholder for the kernel in assembly generated by this
* sample. It will be replaced by the kernel in assembly.
@@ -55,14 +47,14 @@ int main(int argc, char *argv[])
size_t Nbytes = N * sizeof(float);
hipDeviceProp_t props;
CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/));
checkHipErrors(hipGetDeviceProperties(&props, 0/*deviceID*/));
printf ("info: running on device %s\n", props.name);
printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
A_h = (float*)malloc(Nbytes);
CHECK(A_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
checkHipErrors(A_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
C_h = (float*)malloc(Nbytes);
CHECK(C_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
checkHipErrors(C_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
// Fill with Phi + i
for (size_t i=0; i<N; i++)
{
@@ -70,12 +62,12 @@ int main(int argc, char *argv[])
}
printf ("info: allocate device mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
CHECK(hipMalloc(&A_d, Nbytes));
CHECK(hipMalloc(&C_d, Nbytes));
checkHipErrors(hipMalloc(&A_d, Nbytes));
checkHipErrors(hipMalloc(&C_d, Nbytes));
printf ("info: copy Host2Device\n");
CHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
checkHipErrors ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
const unsigned blocks = 512;
const unsigned threadsPerBlock = 256;
@@ -84,12 +76,12 @@ int main(int argc, char *argv[])
vector_square <<<blocks, threadsPerBlock>>> (C_d, A_d, N);
printf ("info: copy Device2Host\n");
CHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
checkHipErrors ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
printf ("info: check result\n");
printf ("info: checkHipErrors result\n");
for (size_t i=0; i<N; i++) {
if (C_h[i] != A_h[i] * A_h[i]) {
CHECK(hipErrorUnknown);
checkHipErrors(hipErrorUnknown);
}
}
printf ("PASSED!\n");
@@ -32,6 +32,7 @@ LLVM_MC=$(HIP_PATH)/llvm/bin/llvm-mc
CLANG_OFFLOAD_BUNDLER=$(HIP_PATH)/llvm/bin/clang-offload-bundler
LLVM_AS=$(HIP_PATH)/llvm/bin/llvm-as
LLVM_DIS=$(HIP_PATH)/llvm/bin/llvm-dis
INCLUDES := -I../../common
SRCS=square.cpp
@@ -60,8 +61,8 @@ GPU_ARCH9=gfx1103
all: src_to_ir bc_to_ll ll_to_bc ir_to_exec
src_to_ir:
$(HIPCC) -c -emit-llvm --cuda-host-only -target x86_64-linux-gnu -o $(SQ_HOST_BC) $(SRCS)
$(HIPCC) -c -emit-llvm --cuda-device-only --offload-arch=$(GPU_ARCH1) --offload-arch=$(GPU_ARCH2) --offload-arch=$(GPU_ARCH3) --offload-arch=$(GPU_ARCH4) --offload-arch=$(GPU_ARCH5) --offload-arch=$(GPU_ARCH6) --offload-arch=$(GPU_ARCH7) --offload-arch=$(GPU_ARCH8) --offload-arch=$(GPU_ARCH9) $(SRCS)
$(HIPCC) $(INCLUDES) -c -emit-llvm --cuda-host-only -target x86_64-linux-gnu -o $(SQ_HOST_BC) $(SRCS)
$(HIPCC) $(INCLUDES) -c -emit-llvm --cuda-device-only --offload-arch=$(GPU_ARCH1) --offload-arch=$(GPU_ARCH2) --offload-arch=$(GPU_ARCH3) --offload-arch=$(GPU_ARCH4) --offload-arch=$(GPU_ARCH5) --offload-arch=$(GPU_ARCH6) --offload-arch=$(GPU_ARCH7) --offload-arch=$(GPU_ARCH8) --offload-arch=$(GPU_ARCH9) $(SRCS)
# By default, the LLVM IR Bitcode file names will be:
# square-hip-amdgcn-amd-amdhsa-gfx900.bc
@@ -19,15 +19,7 @@ THE SOFTWARE.
#include <stdio.h>
#include <hip/hip_runtime.h>
#define CHECK(cmd) \
{\
hipError_t error = cmd;\
if (error != hipSuccess) { \
fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \
exit(EXIT_FAILURE);\
}\
}
#include "hip_helper.h"
/* This kernel is a placeholder for the kernel in LLVM IR generated by this
* sample. It will be replaced by the kernel in LLVM IR.
@@ -55,14 +47,14 @@ int main(int argc, char *argv[])
size_t Nbytes = N * sizeof(float);
hipDeviceProp_t props;
CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/));
checkHipErrors(hipGetDeviceProperties(&props, 0/*deviceID*/));
printf ("info: running on device %s\n", props.name);
printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
A_h = (float*)malloc(Nbytes);
CHECK(A_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
checkHipErrors(A_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
C_h = (float*)malloc(Nbytes);
CHECK(C_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
checkHipErrors(C_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
// Fill with Phi + i
for (size_t i=0; i<N; i++)
{
@@ -70,12 +62,12 @@ int main(int argc, char *argv[])
}
printf ("info: allocate device mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
CHECK(hipMalloc(&A_d, Nbytes));
CHECK(hipMalloc(&C_d, Nbytes));
checkHipErrors(hipMalloc(&A_d, Nbytes));
checkHipErrors(hipMalloc(&C_d, Nbytes));
printf ("info: copy Host2Device\n");
CHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
checkHipErrors ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
const unsigned blocks = 512;
const unsigned threadsPerBlock = 256;
@@ -84,12 +76,12 @@ int main(int argc, char *argv[])
vector_square <<<blocks, threadsPerBlock>>> (C_d, A_d, N);
printf ("info: copy Device2Host\n");
CHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
checkHipErrors ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
printf ("info: check result\n");
printf ("info: checkHipErrors result\n");
for (size_t i=0; i<N; i++) {
if (C_h[i] != A_h[i] * A_h[i]) {
CHECK(hipErrorUnknown);
checkHipErrors(hipErrorUnknown);
}
}
printf ("PASSED!\n");
@@ -22,11 +22,15 @@ project(cmake_hip_device_test)
cmake_minimum_required(VERSION 3.10.2)
include_directories(../../common)
# Find hip
find_package(hip REQUIRED)
# Create the excutable
add_executable(test_cpp square.cpp)
target_include_directories(test_cpp PRIVATE ../../common)
# Link with HIP
target_link_libraries(test_cpp hip::device)
@@ -22,16 +22,7 @@ THE SOFTWARE.
#include <stdio.h>
#include <hip/hip_runtime.h>
#define CHECK(cmd) \
{\
hipError_t error = cmd;\
if (error != hipSuccess) { \
fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \
exit(EXIT_FAILURE);\
}\
}
#include "hip_helper.h"
/*
* Square each element in the array A and write to array C.
@@ -57,14 +48,14 @@ int main(int argc, char *argv[])
size_t Nbytes = N * sizeof(float);
hipDeviceProp_t props;
CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/));
checkHipErrors(hipGetDeviceProperties(&props, 0/*deviceID*/));
printf ("info: running on device %s\n", props.name);
printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
A_h = (float*)malloc(Nbytes);
CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess );
checkHipErrors(A_h == 0 ? hipErrorOutOfMemory : hipSuccess );
C_h = (float*)malloc(Nbytes);
CHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess );
checkHipErrors(C_h == 0 ? hipErrorOutOfMemory : hipSuccess );
// Fill with Phi + i
for (size_t i=0; i<N; i++)
{
@@ -72,12 +63,12 @@ int main(int argc, char *argv[])
}
printf ("info: allocate device mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
CHECK(hipMalloc(&A_d, Nbytes));
CHECK(hipMalloc(&C_d, Nbytes));
checkHipErrors(hipMalloc(&A_d, Nbytes));
checkHipErrors(hipMalloc(&C_d, Nbytes));
printf ("info: copy Host2Device\n");
CHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
checkHipErrors ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
const unsigned blocks = 512;
const unsigned threadsPerBlock = 256;
@@ -86,12 +77,12 @@ int main(int argc, char *argv[])
hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N);
printf ("info: copy Device2Host\n");
CHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
checkHipErrors ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
printf ("info: check result\n");
printf ("info: checkHipErrors result\n");
for (size_t i=0; i<N; i++) {
if (C_h[i] != A_h[i] * A_h[i]) {
CHECK(hipErrorUnknown);
checkHipErrors(hipErrorUnknown);
}
}
printf ("PASSED!\n");
@@ -10,5 +10,8 @@ add_executable(test_fortran TestFortran.F90)
add_executable(test_cpp MatrixTranspose.cpp)
target_link_libraries(test_cpp PUBLIC hip::device)
target_include_directories(test_cpp PRIVATE ../../common)
# Assuming to build a C/C++-to-Fortran library binding.
target_link_libraries(test_fortran PUBLIC hip::device)
@@ -24,7 +24,7 @@ THE SOFTWARE.
// hip header file
#include "hip/hip_runtime.h"
#include "hip_helper.h"
#define WIDTH 1024
@@ -61,7 +61,7 @@ int main() {
float* gpuTransposeMatrix;
hipDeviceProp_t devProp;
hipGetDeviceProperties(&devProp, 0);
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
std::cout << "Device name " << devProp.name << std::endl;
@@ -78,11 +78,11 @@ int main() {
}
// allocate the memory on the device side
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
// Memory transfer from host to device
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
// Lauching kernel from host
hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
@@ -90,7 +90,7 @@ int main() {
gpuMatrix, WIDTH);
// Memory transfer from device to host
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
// CPU MatrixTranspose computation
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
@@ -110,8 +110,8 @@ int main() {
}
// free the resources on device side
hipFree(gpuMatrix);
hipFree(gpuTransposeMatrix);
checkHipErrors(hipFree(gpuMatrix));
checkHipErrors(hipFree(gpuTransposeMatrix));
// free the resources on host side
free(Matrix);
@@ -40,5 +40,7 @@ set(CMAKE_BUILD_TYPE Release)
# Create the excutable
add_executable(hipEvent hipEvent.cpp)
target_include_directories(hipEvent PRIVATE ../../common)
# Link with HIP
target_link_libraries(hipEvent hip::host)
+2 -1
View File
@@ -30,6 +30,7 @@ TARGET=hcc
SOURCES = hipEvent.cpp
OBJECTS = $(SOURCES:.cpp=.o)
INCLUDES := -I../../common
EXECUTABLE=./hipEvent
@@ -38,7 +39,7 @@ EXECUTABLE=./hipEvent
all: $(EXECUTABLE) test
CXXFLAGS =-g
CXXFLAGS =-g $(INCLUDES)
CXX=$(HIPCC)
+22 -21
View File
@@ -24,6 +24,7 @@ THE SOFTWARE.
// hip header file
#include "hip/hip_runtime.h"
#include "hip_helper.h"
#define WIDTH 1024
@@ -59,13 +60,13 @@ int main() {
float* gpuTransposeMatrix;
hipDeviceProp_t devProp;
hipGetDeviceProperties(&devProp, 0);
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
std::cout << "Device name " << devProp.name << std::endl;
hipEvent_t start, stop;
hipEventCreate(&start);
hipEventCreate(&stop);
checkHipErrors(hipEventCreate(&start));
checkHipErrors(hipEventCreate(&stop));
float eventMs = 1.0f;
int i;
@@ -81,25 +82,25 @@ int main() {
}
// allocate the memory on the device side
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
// Record the start event
hipEventRecord(start, NULL);
checkHipErrors(hipEventRecord(start, NULL));
// Memory transfer from host to device
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
// Record the stop event
hipEventRecord(stop, NULL);
hipEventSynchronize(stop);
checkHipErrors(hipEventRecord(stop, NULL));
checkHipErrors(hipEventSynchronize(stop));
hipEventElapsedTime(&eventMs, start, stop);
checkHipErrors(hipEventElapsedTime(&eventMs, start, stop));
printf("hipMemcpyHostToDevice time taken = %6.3fms\n", eventMs);
// Record the start event
hipEventRecord(start, NULL);
checkHipErrors(hipEventRecord(start, NULL));
// Lauching kernel from host
hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
@@ -107,24 +108,24 @@ int main() {
gpuMatrix, WIDTH);
// Record the stop event
hipEventRecord(stop, NULL);
hipEventSynchronize(stop);
checkHipErrors(hipEventRecord(stop, NULL));
checkHipErrors(hipEventSynchronize(stop));
hipEventElapsedTime(&eventMs, start, stop);
checkHipErrors(hipEventElapsedTime(&eventMs, start, stop));
printf("kernel Execution time = %6.3fms\n", eventMs);
// Record the start event
hipEventRecord(start, NULL);
checkHipErrors(hipEventRecord(start, NULL));
// Memory transfer from device to host
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
// Record the stop event
hipEventRecord(stop, NULL);
hipEventSynchronize(stop);
checkHipErrors(hipEventRecord(stop, NULL));
checkHipErrors(hipEventSynchronize(stop));
hipEventElapsedTime(&eventMs, start, stop);
checkHipErrors(hipEventElapsedTime(&eventMs, start, stop));
printf("hipMemcpyDeviceToHost time taken = %6.3fms\n", eventMs);
@@ -146,8 +147,8 @@ int main() {
}
// free the resources on device side
hipFree(gpuMatrix);
hipFree(gpuTransposeMatrix);
checkHipErrors(hipFree(gpuMatrix));
checkHipErrors(hipFree(gpuTransposeMatrix));
// free the resources on host side
free(Matrix);
@@ -30,5 +30,7 @@ find_package(hip REQUIRED)
# Create the excutable
add_executable(square square.cpp)
target_include_directories(square PRIVATE ../../common)
# Link with HIP
target_link_libraries(square hip::device)
@@ -22,16 +22,7 @@ THE SOFTWARE.
#include <stdio.h>
#include <hip/hip_runtime.h>
#define CHECK(cmd) \
{\
hipError_t error = cmd;\
if (error != hipSuccess) { \
fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \
exit(EXIT_FAILURE);\
}\
}
#include "hip_helper.h"
/*
* Square each element in the array A and write to array C.
@@ -57,14 +48,14 @@ int main(int argc, char *argv[])
size_t Nbytes = N * sizeof(float);
hipDeviceProp_t props;
CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/));
checkHipErrors(hipGetDeviceProperties(&props, 0/*deviceID*/));
printf ("info: running on device %s\n", props.name);
printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
A_h = (float*)malloc(Nbytes);
CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess );
checkHipErrors(A_h == 0 ? hipErrorOutOfMemory : hipSuccess );
C_h = (float*)malloc(Nbytes);
CHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess );
checkHipErrors(C_h == 0 ? hipErrorOutOfMemory : hipSuccess );
// Fill with Phi + i
for (size_t i=0; i<N; i++)
{
@@ -72,12 +63,12 @@ int main(int argc, char *argv[])
}
printf ("info: allocate device mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
CHECK(hipMalloc(&A_d, Nbytes));
CHECK(hipMalloc(&C_d, Nbytes));
checkHipErrors(hipMalloc(&A_d, Nbytes));
checkHipErrors(hipMalloc(&C_d, Nbytes));
printf ("info: copy Host2Device\n");
CHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
checkHipErrors ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
const unsigned blocks = 512;
const unsigned threadsPerBlock = 256;
@@ -86,12 +77,12 @@ int main(int argc, char *argv[])
hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N);
printf ("info: copy Device2Host\n");
CHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
checkHipErrors ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
printf ("info: check result\n");
for (size_t i=0; i<N; i++) {
if (C_h[i] != A_h[i] * A_h[i]) {
CHECK(hipErrorUnknown);
checkHipErrors(hipErrorUnknown);
}
}
printf ("PASSED!\n");
@@ -25,3 +25,5 @@ project(cmake_hip_lang_support VERSION 1.0
LANGUAGES HIP)
# Create the executable
add_executable(square square.hip)
target_include_directories(square PRIVATE ../../common)
@@ -22,16 +22,7 @@ THE SOFTWARE.
#include <stdio.h>
#include <hip/hip_runtime.h>
#define CHECK(cmd) \
{\
hipError_t error = cmd;\
if (error != hipSuccess) { \
fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \
exit(EXIT_FAILURE);\
}\
}
#include "hip_helper.h"
/*
* Square each element in the array A and write to array C.
@@ -57,14 +48,14 @@ int main(int argc, char *argv[])
size_t Nbytes = N * sizeof(float);
hipDeviceProp_t props;
CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/));
checkHipErrors(hipGetDeviceProperties(&props, 0/*deviceID*/));
printf ("info: running on device %s\n", props.name);
printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
A_h = (float*)malloc(Nbytes);
CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess );
checkHipErrors(A_h == 0 ? hipErrorOutOfMemory : hipSuccess );
C_h = (float*)malloc(Nbytes);
CHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess );
checkHipErrors(C_h == 0 ? hipErrorOutOfMemory : hipSuccess );
// Fill with Phi + i
for (size_t i=0; i<N; i++)
{
@@ -72,12 +63,12 @@ int main(int argc, char *argv[])
}
printf ("info: allocate device mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
CHECK(hipMalloc(&A_d, Nbytes));
CHECK(hipMalloc(&C_d, Nbytes));
checkHipErrors(hipMalloc(&A_d, Nbytes));
checkHipErrors(hipMalloc(&C_d, Nbytes));
printf ("info: copy Host2Device\n");
CHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
checkHipErrors ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
const unsigned blocks = 512;
const unsigned threadsPerBlock = 256;
@@ -86,12 +77,12 @@ int main(int argc, char *argv[])
hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N);
printf ("info: copy Device2Host\n");
CHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
checkHipErrors ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
printf ("info: check result\n");
for (size_t i=0; i<N; i++) {
if (C_h[i] != A_h[i] * A_h[i]) {
CHECK(hipErrorUnknown);
checkHipErrors(hipErrorUnknown);
}
}
printf ("PASSED!\n");
@@ -34,3 +34,5 @@ add_executable(test saxpy.cpp)
target_link_libraries(test hiprtc::hiprtc)
# Link with HIP
target_link_libraries(test hip::device)
target_include_directories(test PRIVATE ../../common)
+16 -15
View File
@@ -22,6 +22,7 @@ THE SOFTWARE.
#include <hip/hiprtc.h>
#include <hip/hip_runtime.h>
#include <hip_helper.h>
#include <cassert>
#include <cstddef>
@@ -69,7 +70,7 @@ int main()
hipDeviceProp_t props;
int device = 0;
hipGetDeviceProperties(&props, device);
checkHipErrors(hipGetDeviceProperties(&props, device));
const char* options[] = {};
@@ -100,8 +101,8 @@ int main()
hipModule_t module;
hipFunction_t kernel;
hipModuleLoadData(&module, code.data());
hipModuleGetFunction(&kernel, module, "saxpy");
checkHipErrors(hipModuleLoadData(&module, code.data()));
checkHipErrors(hipModuleGetFunction(&kernel, module, "saxpy"));
size_t n = NUM_THREADS * NUM_BLOCKS;
size_t bufferSize = n * sizeof(float);
@@ -117,11 +118,11 @@ int main()
}
hipDeviceptr_t dX, dY, dOut;
hipMalloc((void **)&dX, bufferSize);
hipMalloc((void **)&dY, bufferSize);
hipMalloc((void **)&dOut, bufferSize);
hipMemcpyHtoD(dX, hX.get(), bufferSize);
hipMemcpyHtoD(dY, hY.get(), bufferSize);
checkHipErrors(hipMalloc((void **)&dX, bufferSize));
checkHipErrors(hipMalloc((void **)&dY, bufferSize));
checkHipErrors(hipMalloc((void **)&dOut, bufferSize));
checkHipErrors(hipMemcpyHtoD(dX, hX.get(), bufferSize));
checkHipErrors(hipMemcpyHtoD(dY, hY.get(), bufferSize));
struct {
float a_;
@@ -136,9 +137,9 @@ int main()
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
HIP_LAUNCH_PARAM_END};
hipModuleLaunchKernel(kernel, NUM_BLOCKS, 1, 1, NUM_THREADS, 1, 1,
0, nullptr, nullptr, config);
hipMemcpyDtoH(hOut.get(), dOut, bufferSize);
checkHipErrors(hipModuleLaunchKernel(kernel, NUM_BLOCKS, 1, 1, NUM_THREADS, 1, 1,
0, nullptr, nullptr, config));
checkHipErrors(hipMemcpyDtoH(hOut.get(), dOut, bufferSize));
for (size_t i = 0; i < n; ++i) {
if (fabs(a * hX[i] + hY[i] - hOut[i]) > fabs(hOut[i])* 1e-6) {
@@ -146,11 +147,11 @@ int main()
}
}
hipFree((void *)dX);
hipFree((void *)dY);
hipFree((void *)dOut);
checkHipErrors(hipFree((void *)dX));
checkHipErrors(hipFree((void *)dY));
checkHipErrors(hipFree((void *)dOut));
hipModuleUnload(module);
checkHipErrors(hipModuleUnload(module));
cout << "SAXPY test completed" << endl;
}
@@ -40,5 +40,7 @@ set(CMAKE_BUILD_TYPE Release)
# Create the excutable
add_executable(sharedMemory sharedMemory.cpp)
target_include_directories(sharedMemory PRIVATE ../../common)
# Link with HIP
target_link_libraries(sharedMemory hip::host)
+2 -1
View File
@@ -32,6 +32,7 @@ TARGET=hcc
SOURCES = sharedMemory.cpp
OBJECTS = $(SOURCES:.cpp=.o)
INCLUDES := -I../../common
EXECUTABLE=./sharedMemory
@@ -40,7 +41,7 @@ EXECUTABLE=./sharedMemory
all: $(EXECUTABLE) test
CXXFLAGS =-g
CXXFLAGS =-g $(INCLUDES)
CXX=$(HIPCC)
@@ -24,7 +24,7 @@ THE SOFTWARE.
// hip header file
#include "hip/hip_runtime.h"
#include "hip_helper.h"
#define WIDTH 64
@@ -66,7 +66,7 @@ int main() {
float* gpuTransposeMatrix;
hipDeviceProp_t devProp;
hipGetDeviceProperties(&devProp, 0);
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
std::cout << "Device name " << devProp.name << std::endl;
@@ -83,11 +83,11 @@ int main() {
}
// allocate the memory on the device side
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
// Memory transfer from host to device
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
// Lauching kernel from host
hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
@@ -95,7 +95,7 @@ int main() {
gpuMatrix, WIDTH);
// Memory transfer from device to host
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
// CPU MatrixTranspose computation
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
@@ -116,8 +116,8 @@ int main() {
}
// free the resources on device side
hipFree(gpuMatrix);
hipFree(gpuTransposeMatrix);
checkHipErrors(hipFree(gpuMatrix));
checkHipErrors(hipFree(gpuTransposeMatrix));
// free the resources on host side
free(Matrix);
+2
View File
@@ -40,5 +40,7 @@ set(CMAKE_BUILD_TYPE Release)
# Create the excutable
add_executable(shfl shfl.cpp)
target_include_directories(shfl PRIVATE ../../common)
# Link with HIP
target_link_libraries(shfl hip::host)
+2 -1
View File
@@ -36,6 +36,7 @@ TARGET=hcc
SOURCES = shfl.cpp
OBJECTS = $(SOURCES:.cpp=.o)
INCLUDES := -I../../common
EXECUTABLE=./shfl
@@ -44,7 +45,7 @@ EXECUTABLE=./shfl
all: $(EXECUTABLE) test
CXXFLAGS =-g
CXXFLAGS =-g $(INCLUDES)
CXX=$(HIPCC)
+8 -8
View File
@@ -24,7 +24,7 @@ THE SOFTWARE.
// hip header file
#include "hip/hip_runtime.h"
#include "hip_helper.h"
#define WIDTH 4
@@ -63,7 +63,7 @@ int main() {
float* gpuTransposeMatrix;
hipDeviceProp_t devProp;
hipGetDeviceProperties(&devProp, 0);
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
std::cout << "Device name " << devProp.name << std::endl;
@@ -80,18 +80,18 @@ int main() {
}
// allocate the memory on the device side
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
// Memory transfer from host to device
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
// Lauching kernel from host
hipLaunchKernelGGL(matrixTranspose, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y), 0, 0,
gpuTransposeMatrix, gpuMatrix, WIDTH);
// Memory transfer from device to host
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
// CPU MatrixTranspose computation
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
@@ -112,8 +112,8 @@ int main() {
}
// free the resources on device side
hipFree(gpuMatrix);
hipFree(gpuTransposeMatrix);
checkHipErrors(hipFree(gpuMatrix));
checkHipErrors(hipFree(gpuTransposeMatrix));
// free the resources on host side
free(Matrix);
+8 -7
View File
@@ -24,6 +24,7 @@ THE SOFTWARE.
// hip header file
#include "hip/hip_runtime.h"
#include "hip_helper.h"
#define WIDTH 4
@@ -61,7 +62,7 @@ int main() {
float* gpuTransposeMatrix;
hipDeviceProp_t devProp;
hipGetDeviceProperties(&devProp, 0);
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
std::cout << "Device name " << devProp.name << std::endl;
@@ -78,18 +79,18 @@ int main() {
}
// allocate the memory on the device side
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
// Memory transfer from host to device
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
// Lauching kernel from host
hipLaunchKernelGGL(matrixTranspose, dim3(1), dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0,
gpuTransposeMatrix, gpuMatrix, WIDTH);
// Memory transfer from device to host
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
// CPU MatrixTranspose computation
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
@@ -110,8 +111,8 @@ int main() {
}
// free the resources on device side
hipFree(gpuMatrix);
hipFree(gpuTransposeMatrix);
checkHipErrors(hipFree(gpuMatrix));
checkHipErrors(hipFree(gpuTransposeMatrix));
// free the resources on host side
free(Matrix);
@@ -39,5 +39,7 @@ set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE})
# Create the excutable
add_executable(2dshfl 2dshfl.cpp)
target_include_directories(2dshfl PRIVATE ../../common)
# Link with HIP
target_link_libraries(2dshfl hip::host)
+2 -1
View File
@@ -36,6 +36,7 @@ TARGET=hcc
SOURCES = 2dshfl.cpp
OBJECTS = $(SOURCES:.cpp=.o)
INCLUDES := -I../../common
EXECUTABLE=./2dshfl
@@ -44,7 +45,7 @@ EXECUTABLE=./2dshfl
all: $(EXECUTABLE) test
CXXFLAGS =-g
CXXFLAGS =-g $(INCLUDES)
CXX=$(HIPCC)
@@ -22,6 +22,8 @@ project(dynamic_shared)
cmake_minimum_required(VERSION 3.10)
include_directories(../../common)
if (NOT DEFINED ROCM_PATH )
set ( ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory." )
endif ()
@@ -39,5 +41,7 @@ set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE})
# Create the excutable
add_executable(dynamic_shared dynamic_shared.cpp)
target_include_directories(dynamic_shared PRIVATE ../../common)
# Link with HIP
target_link_libraries(dynamic_shared hip::host)
+2 -1
View File
@@ -32,6 +32,7 @@ TARGET=hcc
SOURCES = dynamic_shared.cpp
OBJECTS = $(SOURCES:.cpp=.o)
INCLUDES := -I../../common
EXECUTABLE=./dynamic_shared
@@ -40,7 +41,7 @@ EXECUTABLE=./dynamic_shared
all: $(EXECUTABLE) test
CXXFLAGS =-g
CXXFLAGS =-g $(INCLUDES)
CXX=$(HIPCC)
@@ -24,6 +24,7 @@ THE SOFTWARE.
// hip header file
#include "hip/hip_runtime.h"
#include "hip_helper.h"
#define WIDTH 16
@@ -65,7 +66,7 @@ int main() {
float* gpuTransposeMatrix;
hipDeviceProp_t devProp;
hipGetDeviceProperties(&devProp, 0);
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
std::cout << "Device name " << devProp.name << std::endl;
@@ -82,11 +83,11 @@ int main() {
}
// allocate the memory on the device side
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
checkHipErrors(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
// Memory transfer from host to device
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
checkHipErrors(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
// Lauching kernel from host
hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
@@ -94,7 +95,7 @@ int main() {
0, gpuTransposeMatrix, gpuMatrix, WIDTH);
// Memory transfer from device to host
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
checkHipErrors(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
// CPU MatrixTranspose computation
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
@@ -115,8 +116,8 @@ int main() {
}
// free the resources on device side
hipFree(gpuMatrix);
hipFree(gpuTransposeMatrix);
checkHipErrors(hipFree(gpuMatrix));
checkHipErrors(hipFree(gpuTransposeMatrix));
// free the resources on host side
free(Matrix);
@@ -39,5 +39,7 @@ set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE})
# Create the excutable
add_executable(stream stream.cpp)
target_include_directories(stream PRIVATE ../../common)
# Link with HIP
target_link_libraries(stream hip::host)
+2 -1
View File
@@ -32,6 +32,7 @@ TARGET=hcc
SOURCES = stream.cpp
OBJECTS = $(SOURCES:.cpp=.o)
INCLUDES := -I../../common
EXECUTABLE=./stream
@@ -40,7 +41,7 @@ EXECUTABLE=./stream
all: $(EXECUTABLE) test
CXXFLAGS =-g
CXXFLAGS =-g $(INCLUDES)
CXX=$(HIPCC)
+13 -12
View File
@@ -22,6 +22,7 @@ THE SOFTWARE.
#include <iostream>
#include <hip/hip_runtime.h>
#include "hip_helper.h"
#define WIDTH 32
@@ -66,11 +67,11 @@ void MultipleStream(float** data, float* randArray, float** gpuTransposeMatrix,
const int num_streams = 2;
hipStream_t streams[num_streams];
for (int i = 0; i < num_streams; i++) hipStreamCreate(&streams[i]);
for (int i = 0; i < num_streams; i++) checkHipErrors(hipStreamCreate(&streams[i]));
for (int i = 0; i < num_streams; i++) {
hipMalloc((void**)&data[i], NUM * sizeof(float));
hipMemcpyAsync(data[i], randArray, NUM * sizeof(float), hipMemcpyHostToDevice, streams[i]);
checkHipErrors(hipMalloc((void**)&data[i], NUM * sizeof(float)));
checkHipErrors(hipMemcpyAsync(data[i], randArray, NUM * sizeof(float), hipMemcpyHostToDevice, streams[i]));
}
hipLaunchKernelGGL(matrixTranspose_static_shared,
@@ -84,12 +85,12 @@ void MultipleStream(float** data, float* randArray, float** gpuTransposeMatrix,
streams[1], gpuTransposeMatrix[1], data[1], width);
for (int i = 0; i < num_streams; i++)
hipMemcpyAsync(TransposeMatrix[i], gpuTransposeMatrix[i], NUM * sizeof(float),
hipMemcpyDeviceToHost, streams[i]);
checkHipErrors(hipMemcpyAsync(TransposeMatrix[i], gpuTransposeMatrix[i], NUM * sizeof(float),
hipMemcpyDeviceToHost, streams[i]));
}
int main() {
hipSetDevice(0);
checkHipErrors(hipSetDevice(0));
float *data[2], *TransposeMatrix[2], *gpuTransposeMatrix[2], *randArray;
@@ -100,8 +101,8 @@ int main() {
TransposeMatrix[0] = (float*)malloc(NUM * sizeof(float));
TransposeMatrix[1] = (float*)malloc(NUM * sizeof(float));
hipMalloc((void**)&gpuTransposeMatrix[0], NUM * sizeof(float));
hipMalloc((void**)&gpuTransposeMatrix[1], NUM * sizeof(float));
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix[0], NUM * sizeof(float)));
checkHipErrors(hipMalloc((void**)&gpuTransposeMatrix[1], NUM * sizeof(float)));
for (int i = 0; i < NUM; i++) {
randArray[i] = (float)i * 1.0f;
@@ -109,7 +110,7 @@ int main() {
MultipleStream(data, randArray, gpuTransposeMatrix, TransposeMatrix, width);
hipDeviceSynchronize();
checkHipErrors(hipDeviceSynchronize());
// verify the results
int errors = 0;
@@ -128,11 +129,11 @@ int main() {
free(randArray);
for (int i = 0; i < 2; i++) {
hipFree(data[i]);
hipFree(gpuTransposeMatrix[i]);
checkHipErrors(hipFree(data[i]));
checkHipErrors(hipFree(gpuTransposeMatrix[i]));
free(TransposeMatrix[i]);
}
hipDeviceReset();
checkHipErrors(hipDeviceReset());
return 0;
}
@@ -39,5 +39,7 @@ set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE})
# Create the excutable
add_executable(unroll unroll.cpp)
target_include_directories(unroll PRIVATE ../../common)
# Link with HIP
target_link_libraries(unroll hip::host)
+2 -1
View File
@@ -36,6 +36,7 @@ TARGET=hcc
SOURCES = unroll.cpp
OBJECTS = $(SOURCES:.cpp=.o)
INCLUDES := -I../../common
EXECUTABLE=./unroll
@@ -44,7 +45,7 @@ EXECUTABLE=./unroll
all: $(EXECUTABLE) test
CXXFLAGS =-g
CXXFLAGS =-g $(INCLUDES)
CXX=$(HIPCC)
+9 -8
View File
@@ -24,6 +24,7 @@ THE SOFTWARE.
// hip header file
#include "hip/hip_runtime.h"
#include "hip_helper.h"
#define LENGTH 4
@@ -59,7 +60,7 @@ int main() {
int* gpuSumMatrix;
hipDeviceProp_t devProp;
hipGetDeviceProperties(&devProp, 0);
checkHipErrors(hipGetDeviceProperties(&devProp, 0));
std::cout << "Device name " << devProp.name << std::endl;
@@ -76,19 +77,19 @@ int main() {
}
// Allocated Device Memory
hipMalloc((void**)&gpuMatrix, SIZE * sizeof(int));
hipMalloc((void**)&gpuSumMatrix, LENGTH * sizeof(int));
checkHipErrors(hipMalloc((void**)&gpuMatrix, SIZE * sizeof(int)));
checkHipErrors(hipMalloc((void**)&gpuSumMatrix, LENGTH * sizeof(int)));
// Memory Copy to Device
hipMemcpy(gpuMatrix, Matrix, SIZE * sizeof(int), hipMemcpyHostToDevice);
hipMemcpy(gpuSumMatrix, cpuSumMatrix, LENGTH * sizeof(float), hipMemcpyHostToDevice);
checkHipErrors(hipMemcpy(gpuMatrix, Matrix, SIZE * sizeof(int), hipMemcpyHostToDevice));
checkHipErrors(hipMemcpy(gpuSumMatrix, cpuSumMatrix, LENGTH * sizeof(float), hipMemcpyHostToDevice));
// Launch device kernels
hipLaunchKernelGGL(gpuMatrixRowSum, dim3(BLOCKS_PER_GRID), dim3(THREADS_PER_BLOCK), 0, 0,
gpuMatrix, gpuSumMatrix, LENGTH);
// Memory copy back to device
hipMemcpy(sumMatrix, gpuSumMatrix, LENGTH * sizeof(int), hipMemcpyDeviceToHost);
checkHipErrors(hipMemcpy(sumMatrix, gpuSumMatrix, LENGTH * sizeof(int), hipMemcpyDeviceToHost));
// Cpu implementation
matrixRowSum(Matrix, cpuSumMatrix, LENGTH);
@@ -110,8 +111,8 @@ int main() {
}
// GPU Free
hipFree(gpuMatrix);
hipFree(gpuSumMatrix);
checkHipErrors(hipFree(gpuMatrix));
checkHipErrors(hipFree(gpuSumMatrix));
// CPU Free
free(Matrix);
+38
View File
@@ -0,0 +1,38 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
#ifndef checkHipErrors
#define checkHipErrors(err) __checkHipErrors(err, __FILE__, __LINE__)
inline void __checkHipErrors(hipError_t err, const char *file, const int line) {
if (HIP_SUCCESS != err) {
const char *errorStr = hipGetErrorString(err);
fprintf(stderr,
"checkHipErrors() HIP API error = %04d \"%s\" from file <%s>, "
"line %i.\n",
err, errorStr, file, line);
exit(EXIT_FAILURE);
}
}
#endif