Merge branch 'privatestaging' of https://github.com/AMDComputeLibraries/HIP-privatestaging into privatestaging
[ROCm/clr commit: c3b854fba4]
Tá an tiomantas seo le fáil i:
@@ -64,7 +64,7 @@ if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT AND CMAKE_INSTALL_PREFIX MATCHES
|
||||
if(CMAKE_BUILD_TYPE MATCHES Debug)
|
||||
set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_SOURCE_DIR} CACHE PATH "Installation path for HIP" FORCE)
|
||||
elseif(CMAKE_BUILD_TYPE MATCHES Release)
|
||||
set(CMAKE_INSTALL_PREFIX "/opt/hip" CACHE PATH "Installation path for HIP" FORCE)
|
||||
set(CMAKE_INSTALL_PREFIX "/opt/rocm/hip" CACHE PATH "Installation path for HIP" FORCE)
|
||||
else()
|
||||
message(FATAL_ERROR "Invalid CMAKE_BUILD_TYPE specified. Valid values are Debug and Release")
|
||||
endif()
|
||||
|
||||
@@ -34,30 +34,34 @@ Make sure HIP_PATH is pointed to `/where/to/install/hip` and PATH includes `$HIP
|
||||
## How do I get set up?
|
||||
|
||||
### Prerequisites - Choose Your Platform
|
||||
HIP code can be developed either on AMD HSA or Boltzmann platform using hcc compiler, or a CUDA platform with nvcc installed:
|
||||
HIP code can be developed either on AMD ROCm platform using hcc compiler, or a CUDA platform with nvcc installed:
|
||||
|
||||
#### AMD (hcc):
|
||||
|
||||
* Install [hcc](https://bitbucket.org/multicoreware/hcc/wiki/Home) including supporting HSA kernel and runtime driver stack
|
||||
* By default HIP looks for hcc in /opt/hcc (can be overridden by setting HCC_HOME environment variable)
|
||||
* By default HIP looks for HSA in /opt/hsa (can be overridden by setting HSA_PATH environment variable)
|
||||
* By default HIP looks for hcc in /opt/rocm/hcc (can be overridden by setting HCC_HOME environment variable)
|
||||
* By default HIP looks for HSA in /opt/rocm/hsa (can be overridden by setting HSA_PATH environment variable)
|
||||
* Ensure that ROCR runtime is installed and added to LD_LIBRARY_PATH
|
||||
* Install HIP (from this GitHub repot). By default HIP is installed into /opt/rocm/hip (can be overridden by setting HIP_PATH environment variable).
|
||||
|
||||
* Optionally, consider adding /opt/rocm/bin to your path to make it easier to use the tools.
|
||||
|
||||
#### NVIDIA (nvcc)
|
||||
* Install CUDA SDK from manufacturer website
|
||||
* By default HIP looks for CUDA SDK in /usr/local/cuda (can be overriden by setting CUDA_PATH env variable)
|
||||
|
||||
### Add HIP/bin to your path.
|
||||
For example, if this repot is cloned to ~/HIP, and you are running bash:
|
||||
```
|
||||
> export PATH=$PATH:~/HIP/bin
|
||||
|
||||
#### Verify your installation
|
||||
Run hipconfig (instructions below assume default installation path) :
|
||||
```
|
||||
Verify your can find hipconfig (one of the hip tools in bin dir):
|
||||
```
|
||||
> hipconfig -pn
|
||||
/home/me/HIP
|
||||
> /opt/rocm/bin/hipconfig --full
|
||||
```
|
||||
|
||||
Compile and run the [square sample](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/tree/master/samples/0_Intro/square).
|
||||
|
||||
|
||||
|
||||
### HCC Options
|
||||
|
||||
#### Compiling CodeXL markers for HIP Functions
|
||||
|
||||
@@ -196,7 +196,7 @@ if ($needHipHcc) {
|
||||
$HIP_USE_SHARED_LIBRARY = $ENV{'HIP_USE_SHARED_LIBRARY'};
|
||||
$HIP_USE_SHARED_LIBRARY = 0 unless defined $HIP_USE_SHARED_LIBRARY;
|
||||
|
||||
#$HIPLDFLAGS .= " -L/opt/hip/lib -lhip_hcc" ;
|
||||
#$HIPLDFLAGS .= " -L/opt/rocm/hip/lib -lhip_hcc" ;
|
||||
if ($HIP_USE_SHARED_LIBRARY) {
|
||||
$HIPLDFLAGS .= " -L$HIP_PATH/lib -Wl,--rpath=$HIP_PATH/lib -lhip_hcc";
|
||||
} else {
|
||||
|
||||
@@ -107,9 +107,15 @@ HIP is a portable C++ language that supports a strong subset of the CUDA run-tim
|
||||
|
||||
A C++ dialect, hc is supported by the AMD HCC compiler. It provides C++ run time, C++ kernel-launch APIs (parallel_for_each), C++ kernel language, and several memory-management options, including pointers, arrays and array_view (with implicit data synchronization). It's intended to be a leading indicator of the ISO C++ standard.
|
||||
|
||||
|
||||
### HIP detected my platform (hcc vs nvcc) incorrectly - what should I do?
|
||||
|
||||
HIP will set the platform to HCC if it sees that the AMD graphics driver is installed and has detected an AMD GPU.
|
||||
Sometimes this isn't what you want - you can force HIP to recognize the platform by setting HIP_PLATFORM to hcc (or nvcc)
|
||||
```
|
||||
export HIP_PLATFORM=hcc
|
||||
|
||||
```
|
||||
One symptom of this problem is the message "error: 'unknown error'(11) at square.hipref.cpp:56". This can occur if you have a CUDA installation on an AMD platform, and HIP incorrectly detects the platform as nvcc. HIP may be able to compile the application using the nvcc tool-chain, but will generate this error at runtime since the platform does not have a CUDA device. The fix is to set HIP_PLATFORM=hcc and rebuild the issue.
|
||||
|
||||
If you see issues related to incorrect platform detection, please file an issue with the GitHub issue tracker so we can improve HIP's platform detection logic.
|
||||
|
||||
Nasc siombalach
@@ -0,0 +1 @@
|
||||
../include
|
||||
@@ -1,6 +1,6 @@
|
||||
#Dependencies : [MYHIP]/bin must be in user's path.
|
||||
|
||||
HIP_PATH=?../../..
|
||||
HIP_PATH?=../../..
|
||||
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform)
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
|
||||
|
||||
@@ -24,11 +24,14 @@ THE SOFTWARE.
|
||||
#include <hip_runtime.h>
|
||||
|
||||
|
||||
#define CHECK(error) \
|
||||
#define CHECK(cmd) \
|
||||
{\
|
||||
hipError_t error = cmd;\
|
||||
if (error != hipSuccess) { \
|
||||
fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \
|
||||
exit(EXIT_FAILURE);\
|
||||
}
|
||||
}\
|
||||
}
|
||||
|
||||
void __global__
|
||||
bit_extract_kernel(hipLaunchParm lp, uint32_t *C_d, const uint32_t *A_d, size_t N)
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
HCC_HOME?=/opt/rocm/hcc
|
||||
HCC = $(HCC_HOME)/bin/hcc
|
||||
|
||||
HCC_CFLAGS= `$(HCC_HOME)/bin/hcc-config --cxxflags`
|
||||
HCC_LDFLAGS= `$(HCC_HOME)/bin/hcc-config --ldflags`
|
||||
|
||||
CPPAMP_CFLAGS= -std=c++amp -stdlib=libc++ -I/opt/hcc/include
|
||||
CPPAMP_LDFLAGS= -std=c++amp -L/opt/hcc/lib -Wl,--rpath=/opt/hcc/lib -lc++ -lc++abi -ldl -lpthread -Wl,--whole-archive -lmcwamp -Wl,--no-whole-archive
|
||||
|
||||
HIP_PATH?=/opt/rocm/hip
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform)
|
||||
|
||||
ifneq (${HIP_PLATFORM}, hcc)
|
||||
$(error hcc_dialects requires hcc compiler and only runs on hcc platform)
|
||||
endif
|
||||
|
||||
|
||||
TARGETS=vadd_hc_arrayview vadd_hc_array vadd_amp_arrayview vadd_hip
|
||||
|
||||
all: $(TARGETS)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGETS) *.o
|
||||
|
||||
run: $(TARGETS)
|
||||
@for t in $(TARGETS); do\
|
||||
echo "Running $$t"; \
|
||||
./$$t; \
|
||||
done
|
||||
|
||||
|
||||
# HCC version:
|
||||
vadd_hc_arrayview.o: vadd_hc_arrayview.cpp
|
||||
$(HCC) $(HCC_CFLAGS) -c $< -o $@
|
||||
vadd_hc_arrayview: vadd_hc_arrayview.o
|
||||
$(HCC) $(HCC_LDFLAGS) $< -o $@
|
||||
|
||||
|
||||
# HCC version, using explicit arrays:
|
||||
vadd_hc_array.o: vadd_hc_array.cpp
|
||||
$(HCC) $(HCC_CFLAGS) -c $< -o $@
|
||||
vadd_hc_array: vadd_hc_array.o
|
||||
$(HCC) $(HCC_LDFLAGS) $< -o $@
|
||||
|
||||
|
||||
# HCC version, using AM (accelerator memory) pointer
|
||||
vadd_hc_am.o: vadd_hc_am.cpp
|
||||
$(HCC) $(HCC_CFLAGS) -c $< -o $@
|
||||
vadd_hc_am: vadd_hc_am.o
|
||||
$(HCC) $(HCC_LDFLAGS) $< -o $@
|
||||
|
||||
|
||||
|
||||
# HIP version:
|
||||
vadd_hip.o: vadd_hip.cpp
|
||||
$(HIPCC) -c $< -o $@
|
||||
vadd_hip: vadd_hip.o
|
||||
$(HIPCC) $< -o $@
|
||||
|
||||
|
||||
# AMP version:
|
||||
vadd_amp_arrayview.o: vadd_amp_arrayview.cpp
|
||||
$(HCC) $(CPPAMP_CFLAGS) -c $< -o $@
|
||||
vadd_amp_arrayview: vadd_amp_arrayview.o
|
||||
$(HCC) $(CPPAMP_LDFLAGS) $< -o $@
|
||||
@@ -0,0 +1,48 @@
|
||||
// Simple test showing how to use C++AMP syntax with array_view.
|
||||
// The code uses AMP's array_view class, which provides automatic data synchronization
|
||||
// of data between the host and the accelerator. As noted below, the HCC runtime
|
||||
// will automatically copy data to and from the host, without the user needing
|
||||
// to manually perform such copies. This is an excellent mode for developers
|
||||
// new to GPU programming and matches the memory models provided by recent systems where
|
||||
// CPU and GPU share the same memory pool. Advanced programmers may prefer
|
||||
// more explicit control over the data movement - shown in the other vadd_hc_array and
|
||||
// vadd_hc_am examples.
|
||||
// This example shows the similarity between C++AMP and and HC for simple cases where
|
||||
// implicit data transfer is used - really the only difference is the namespace.
|
||||
// Other examples show some of the more advanced controls.
|
||||
|
||||
#include <amp.h>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int sizeElements = 1000000;
|
||||
|
||||
// Allocate auto-managed host/device views of data:
|
||||
concurrency::array_view<float> A(sizeElements);
|
||||
concurrency::array_view<float> B(sizeElements);
|
||||
concurrency::array_view<float> C(sizeElements);
|
||||
|
||||
// Initialize host data
|
||||
for (int i=0; i<sizeElements; i++) {
|
||||
A[i] = 1.618f * i;
|
||||
B[i] = 3.142f * i;
|
||||
}
|
||||
C.discard_data(); // tell runtime not to copy CPU host data.
|
||||
|
||||
|
||||
// Launch kernel onto default accelerator
|
||||
// The HCC runtime will ensure that A and B are available on the accelerator before launching the kernel.
|
||||
concurrency::parallel_for_each(concurrency::extent<1> (sizeElements),
|
||||
[=] (concurrency::index<1> idx) restrict(amp) {
|
||||
int i = idx[0];
|
||||
C[i] = A[i] + B[i];
|
||||
});
|
||||
|
||||
for (int i=0; i<sizeElements; i++) {
|
||||
float ref= 1.618f * i + 3.142f * i;
|
||||
// Because C is an array_view, the HCC runtime will copy C back to host at first access here:
|
||||
if (C[i] != ref) {
|
||||
printf ("error:%d computed=%6.2f, reference=%6.2f\n", i, C[i], ref);
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
// Simple test showing how to use HC syntax with AM (accelerator memory).
|
||||
// AM provides a set of c-style memory management routines for allocating,
|
||||
// freeing, and copying memory. am_alloc returns a device pointer
|
||||
// which can only be used on the device. The programmer has full control
|
||||
// over when data is copied.
|
||||
|
||||
#include <hc.hpp>
|
||||
#include <hc_am.hpp>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int sizeElements = 1000000;
|
||||
size_t sizeBytes = sizeElements * sizeof(float);
|
||||
|
||||
// Allocate host memory
|
||||
float *A_h = (float*)malloc(sizeBytes);
|
||||
float *B_h = (float*)malloc(sizeBytes);
|
||||
float *C_h = (float*)malloc(sizeBytes);
|
||||
|
||||
// Allocate device pointers:
|
||||
// Unlike array_view, these must be explicitly managed by user:
|
||||
hc::accelerator acc; // grab default accelerator where we want to allocate memory:
|
||||
hc::accelerator_view av = acc.get_default_view();
|
||||
|
||||
float *A_d, *B_d, *C_d;
|
||||
A_d = hc::am_alloc(sizeBytes, acc, 0);
|
||||
B_d = hc::am_alloc(sizeBytes, acc, 0);
|
||||
C_d = hc::am_alloc(sizeBytes, acc, 0);
|
||||
|
||||
// Initialize host data
|
||||
for (int i=0; i<sizeElements; i++) {
|
||||
A_h[i] = 1.618f * i;
|
||||
B_h[i] = 3.142f * i;
|
||||
}
|
||||
|
||||
av.copy(A_h, A_d); // C++ copy H2D
|
||||
av.copy(B_h, B_d); //C++ copy H2D
|
||||
|
||||
// Launch kernel onto AV.
|
||||
// Because the kernel PFE and the copies are submitted to same AV, they will execute in order
|
||||
// and we don't need additional synchronization to ensure the copies complete before the PFE begins.
|
||||
hc::parallel_for_each(av, hc::extent<1> (sizeElements),
|
||||
[&] (hc::index<1> idx) [[hc]] {
|
||||
int i = idx[0];
|
||||
C_d[i] = A_d[i] + B_d[i];
|
||||
});
|
||||
|
||||
|
||||
// This copy is in same AV as the kernel and thus will wait for the kernel to finish before executing.
|
||||
av.copy(C_d, C_h); // C++ copy D2H
|
||||
|
||||
|
||||
for (int i=0; i<sizeElements; i++) {
|
||||
float ref= 1.618f * i + 3.142f * i;
|
||||
if (C_h[i] != ref) {
|
||||
printf ("error:%d computed=%6.2f, reference=%6.2f\n", i, C_h[i], ref);
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
// Simple test showing how to use HC syntax with array.
|
||||
// Array provides a type-safe C++ mechanism to allocate accelerator memory.
|
||||
// Like array_view, hc::array provides multi-dimensional indexing capability,
|
||||
// and is typed. However, unlike array_view, hc::array does not provide
|
||||
// automatic data management capabilities - instead the programmer
|
||||
// takes the reins and controls when copies are executed.
|
||||
|
||||
#include <hc.hpp>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int sizeElements = 1000000;
|
||||
size_t sizeBytes = sizeElements * sizeof(float);
|
||||
|
||||
// Allocate host memory
|
||||
float *A_h = (float*)malloc(sizeBytes);
|
||||
float *B_h = (float*)malloc(sizeBytes);
|
||||
float *C_h = (float*)malloc(sizeBytes);
|
||||
|
||||
// Allocate device arrays<>
|
||||
// Unlike array_view, these must be explicitly managed by user:
|
||||
hc::array<float> A_d(sizeElements);
|
||||
hc::array<float> B_d(sizeElements);
|
||||
hc::array<float> C_d(sizeElements);
|
||||
|
||||
// Initialize host data
|
||||
for (int i=0; i<sizeElements; i++) {
|
||||
A_h[i] = 1.618f * i;
|
||||
B_h[i] = 3.142f * i;
|
||||
}
|
||||
|
||||
hc::copy(A_h, A_d); // C++ copy H2D
|
||||
hc::copy(B_h, B_d); // C++ copy H2D
|
||||
|
||||
// Launch kernel onto default accelerator:
|
||||
// array<> types are not implicitly copied, so we performed copies above.
|
||||
hc::parallel_for_each(hc::extent<1> (sizeElements),
|
||||
[&] (hc::index<1> idx) [[hc]] {
|
||||
int i = idx[0];
|
||||
C_d[i] = A_d[i] + B_d[i];
|
||||
});
|
||||
|
||||
// HCC runtime knows that C_d depends on previous PFE and will force the copy to wait for the PFE to complte.
|
||||
hc::copy(C_d, C_h); // C++ copy D2H
|
||||
|
||||
|
||||
for (int i=0; i<sizeElements; i++) {
|
||||
float ref= 1.618f * i + 3.142f * i;
|
||||
if (C_h[i] != ref) {
|
||||
printf ("error:%d computed=%6.2f, reference=%6.2f\n", i, C_h[i], ref);
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
#include <hc.hpp>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int size = 1000000;
|
||||
|
||||
// Allocate auto-managed host/device views of data:
|
||||
hc::array_view<float> A(size);
|
||||
hc::array_view<float> B(size);
|
||||
hc::array_view<float> C(size);
|
||||
|
||||
// Initialize host data
|
||||
for (int i=0; i<size; i++) {
|
||||
A[i] = 1.618f * i;
|
||||
B[i] = 3.142f * i;
|
||||
}
|
||||
C.discard_data(); // tell runtime not to copy CPU host data.
|
||||
|
||||
|
||||
// Launch kernel onto default accelerator:
|
||||
hc::parallel_for_each(hc::extent<1> (size),
|
||||
[=] (hc::index<1> idx) [[hc]] {
|
||||
int i = idx[0];
|
||||
C[i] = A[i] + B[i];
|
||||
});
|
||||
|
||||
for (int i=0; i<size; i++) {
|
||||
float ref= 1.618f * i + 3.142f * i;
|
||||
if (C[i] != ref) {
|
||||
printf ("error:%d computed=%6.2f, reference=%6.2f\n", i, C[i], ref);
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
// Simple test showing how to use HC syntax with array_view.
|
||||
// The code uses AMP's array_view class, which provides automatic data synchronization
|
||||
// of data between the host and the accelerator. As noted below, the HCC runtime
|
||||
// will automatically copy data to and from the host, without the user needing
|
||||
// to manually perform such copies. This is an excellent mode for developers
|
||||
// new to GPU programming and matches the memory models provided by recent systems where
|
||||
// CPU and GPU share the same memory pool. Advanced programmers may prefer
|
||||
// more explicit control over the data movement - shown in the other vadd_hc_array and
|
||||
// vadd_hc_am examples.
|
||||
// This example shows the similarity between C++AMP and and HC for simple cases where
|
||||
// implicit data transfer is used - really the only difference is the namespace.
|
||||
// Other examples show some of the more advanced controls.
|
||||
|
||||
#include <hc.hpp>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int sizeElements = 1000000;
|
||||
|
||||
// Allocate auto-managed host/device views of data:
|
||||
hc::array_view<float> A(sizeElements);
|
||||
hc::array_view<float> B(sizeElements);
|
||||
hc::array_view<float> C(sizeElements);
|
||||
|
||||
// Initialize host data
|
||||
for (int i=0; i<sizeElements; i++) {
|
||||
A[i] = 1.618f * i;
|
||||
B[i] = 3.142f * i;
|
||||
}
|
||||
C.discard_data(); // tell runtime not to copy CPU host data.
|
||||
|
||||
|
||||
// Launch kernel onto default accelerator:
|
||||
// The HCC runtime will ensure that A and B are available on the accelerator before launching the kernel.
|
||||
hc::parallel_for_each(hc::extent<1> (sizeElements),
|
||||
[=] (hc::index<1> idx) [[hc]] {
|
||||
int i = idx[0];
|
||||
C[i] = A[i] + B[i];
|
||||
});
|
||||
|
||||
for (int i=0; i<sizeElements; i++) {
|
||||
float ref= 1.618f * i + 3.142f * i;
|
||||
// Because C is an array_view, the HCC runtime will copy C back to host at first access here:
|
||||
if (C[i] != ref) {
|
||||
printf ("error:%d computed=%6.2f, reference=%6.2f\n", i, C[i], ref);
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
#include <hip_runtime.h>
|
||||
|
||||
__global__ void vadd_hip(hipLaunchParm lp, const float *a, const float *b, float *c, int N)
|
||||
{
|
||||
int idx = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
|
||||
|
||||
if (idx < N) {
|
||||
c[idx] = a[idx] + b[idx];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int sizeElements = 1000000;
|
||||
size_t sizeBytes = sizeElements * sizeof(float);
|
||||
|
||||
// Allocate host memory
|
||||
float *A_h = (float*)malloc(sizeBytes);
|
||||
float *B_h = (float*)malloc(sizeBytes);
|
||||
float *C_h = (float*)malloc(sizeBytes);
|
||||
|
||||
// Allocate device memory:
|
||||
float *A_d, *B_d, *C_d;
|
||||
hipMalloc(&A_d, sizeBytes);
|
||||
hipMalloc(&B_d, sizeBytes);
|
||||
hipMalloc(&C_d, sizeBytes);
|
||||
|
||||
// Initialize host data
|
||||
for (int i=0; i<sizeElements; i++) {
|
||||
A_h[i] = 1.618f * i;
|
||||
B_h[i] = 3.142f * i;
|
||||
}
|
||||
|
||||
hipMemcpy(A_d, A_h, sizeBytes, hipMemcpyHostToDevice);
|
||||
hipMemcpy(B_d, B_h, sizeBytes, hipMemcpyHostToDevice);
|
||||
|
||||
// Launch kernel onto default accelerator:
|
||||
int blockSize = 256; // pick arbitrary block size
|
||||
int blocks = (sizeElements+blockSize-1)/blockSize; // round up to launch enough blocks
|
||||
hipLaunchKernel(vadd_hip, dim3(blocks), dim3(blockSize), 0, 0, A_d, B_d, C_d, sizeElements);
|
||||
|
||||
hipMemcpy(C_h, C_d, sizeBytes, hipMemcpyDeviceToHost);
|
||||
|
||||
for (int i=0; i<sizeElements; i++) {
|
||||
float ref= 1.618f * i + 3.142f * i;
|
||||
if (C_h[i] != ref) {
|
||||
printf ("error:%d computed=%6.2f, reference=%6.2f\n", i, C_h[i], ref);
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -22,11 +22,14 @@ THE SOFTWARE.
|
||||
#include <stdio.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#define CHECK(error) \
|
||||
#define CHECK(cmd) \
|
||||
{\
|
||||
hipError_t error = cmd;\
|
||||
if (error != cudaSuccess) { \
|
||||
fprintf(stderr, "error: '%s'(%d) at %s:%d\n", cudaGetErrorString(error), error,__FILE__, __LINE__); \
|
||||
exit(EXIT_FAILURE);\
|
||||
}
|
||||
}\
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
|
||||
@@ -22,11 +22,14 @@ THE SOFTWARE.
|
||||
#include <stdio.h>
|
||||
#include <hip_runtime.h>
|
||||
|
||||
#define CHECK(error) \
|
||||
#define CHECK(cmd) \
|
||||
{\
|
||||
hipError_t error = cmd;\
|
||||
if (error != hipSuccess) { \
|
||||
fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \
|
||||
exit(EXIT_FAILURE);\
|
||||
}
|
||||
}\
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
|
||||
Tagairt in Eagrán Nua
Cuir bac ar úsáideoir