diff --git a/projects/clr/hipamd/CMakeLists.txt b/projects/clr/hipamd/CMakeLists.txt index 124295fbf7..e7eee21b01 100644 --- a/projects/clr/hipamd/CMakeLists.txt +++ b/projects/clr/hipamd/CMakeLists.txt @@ -64,7 +64,7 @@ if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT AND CMAKE_INSTALL_PREFIX MATCHES if(CMAKE_BUILD_TYPE MATCHES Debug) set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_SOURCE_DIR} CACHE PATH "Installation path for HIP" FORCE) elseif(CMAKE_BUILD_TYPE MATCHES Release) - set(CMAKE_INSTALL_PREFIX "/opt/hip" CACHE PATH "Installation path for HIP" FORCE) + set(CMAKE_INSTALL_PREFIX "/opt/rocm/hip" CACHE PATH "Installation path for HIP" FORCE) else() message(FATAL_ERROR "Invalid CMAKE_BUILD_TYPE specified. Valid values are Debug and Release") endif() diff --git a/projects/clr/hipamd/README.md b/projects/clr/hipamd/README.md index 810c378436..39c1092c63 100644 --- a/projects/clr/hipamd/README.md +++ b/projects/clr/hipamd/README.md @@ -34,30 +34,34 @@ Make sure HIP_PATH is pointed to `/where/to/install/hip` and PATH includes `$HIP ## How do I get set up? ### Prerequisites - Choose Your Platform -HIP code can be developed either on AMD HSA or Boltzmann platform using hcc compiler, or a CUDA platform with nvcc installed: +HIP code can be developed either on AMD ROCm platform using hcc compiler, or a CUDA platform with nvcc installed: #### AMD (hcc): * Install [hcc](https://bitbucket.org/multicoreware/hcc/wiki/Home) including supporting HSA kernel and runtime driver stack -* By default HIP looks for hcc in /opt/hcc (can be overridden by setting HCC_HOME environment variable) -* By default HIP looks for HSA in /opt/hsa (can be overridden by setting HSA_PATH environment variable) +* By default HIP looks for hcc in /opt/rocm/hcc (can be overridden by setting HCC_HOME environment variable) +* By default HIP looks for HSA in /opt/rocm/hsa (can be overridden by setting HSA_PATH environment variable) * Ensure that ROCR runtime is installed and added to LD_LIBRARY_PATH +* Install HIP (from this GitHub repot). By default HIP is installed into /opt/rocm/hip (can be overridden by setting HIP_PATH environment variable). + +* Optionally, consider adding /opt/rocm/bin to your path to make it easier to use the tools. #### NVIDIA (nvcc) * Install CUDA SDK from manufacturer website * By default HIP looks for CUDA SDK in /usr/local/cuda (can be overriden by setting CUDA_PATH env variable) -### Add HIP/bin to your path. -For example, if this repot is cloned to ~/HIP, and you are running bash: ``` -> export PATH=$PATH:~/HIP/bin + +#### Verify your installation +Run hipconfig (instructions below assume default installation path) : ``` -Verify your can find hipconfig (one of the hip tools in bin dir): -``` -> hipconfig -pn -/home/me/HIP +> /opt/rocm/bin/hipconfig --full ``` +Compile and run the [square sample](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/tree/master/samples/0_Intro/square). + + + ### HCC Options #### Compiling CodeXL markers for HIP Functions diff --git a/projects/clr/hipamd/bin/hipcc b/projects/clr/hipamd/bin/hipcc index ecf03e24ee..33bc1d9eca 100755 --- a/projects/clr/hipamd/bin/hipcc +++ b/projects/clr/hipamd/bin/hipcc @@ -196,7 +196,7 @@ if ($needHipHcc) { $HIP_USE_SHARED_LIBRARY = $ENV{'HIP_USE_SHARED_LIBRARY'}; $HIP_USE_SHARED_LIBRARY = 0 unless defined $HIP_USE_SHARED_LIBRARY; - #$HIPLDFLAGS .= " -L/opt/hip/lib -lhip_hcc" ; + #$HIPLDFLAGS .= " -L/opt/rocm/hip/lib -lhip_hcc" ; if ($HIP_USE_SHARED_LIBRARY) { $HIPLDFLAGS .= " -L$HIP_PATH/lib -Wl,--rpath=$HIP_PATH/lib -lhip_hcc"; } else { diff --git a/projects/clr/hipamd/docs/markdown/hip_faq.md b/projects/clr/hipamd/docs/markdown/hip_faq.md index 1a62784100..f1d8c607ea 100644 --- a/projects/clr/hipamd/docs/markdown/hip_faq.md +++ b/projects/clr/hipamd/docs/markdown/hip_faq.md @@ -107,9 +107,15 @@ HIP is a portable C++ language that supports a strong subset of the CUDA run-tim A C++ dialect, hc is supported by the AMD HCC compiler. It provides C++ run time, C++ kernel-launch APIs (parallel_for_each), C++ kernel language, and several memory-management options, including pointers, arrays and array_view (with implicit data synchronization). It's intended to be a leading indicator of the ISO C++ standard. + ### HIP detected my platform (hcc vs nvcc) incorrectly - what should I do? + HIP will set the platform to HCC if it sees that the AMD graphics driver is installed and has detected an AMD GPU. Sometimes this isn't what you want - you can force HIP to recognize the platform by setting HIP_PLATFORM to hcc (or nvcc) ``` export HIP_PLATFORM=hcc + ``` +One symptom of this problem is the message "error: 'unknown error'(11) at square.hipref.cpp:56". This can occur if you have a CUDA installation on an AMD platform, and HIP incorrectly detects the platform as nvcc. HIP may be able to compile the application using the nvcc tool-chain, but will generate this error at runtime since the platform does not have a CUDA device. The fix is to set HIP_PLATFORM=hcc and rebuild the issue. + +If you see issues related to incorrect platform detection, please file an issue with the GitHub issue tracker so we can improve HIP's platform detection logic. diff --git a/projects/clr/hipamd/include/hip b/projects/clr/hipamd/include/hip new file mode 120000 index 0000000000..f5030fe889 --- /dev/null +++ b/projects/clr/hipamd/include/hip @@ -0,0 +1 @@ +../include \ No newline at end of file diff --git a/projects/clr/hipamd/samples/0_Intro/bit_extract/Makefile b/projects/clr/hipamd/samples/0_Intro/bit_extract/Makefile index cdf793363b..b71828f5fa 100644 --- a/projects/clr/hipamd/samples/0_Intro/bit_extract/Makefile +++ b/projects/clr/hipamd/samples/0_Intro/bit_extract/Makefile @@ -1,6 +1,6 @@ #Dependencies : [MYHIP]/bin must be in user's path. -HIP_PATH=?../../.. +HIP_PATH?=../../.. HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform) HIPCC=$(HIP_PATH)/bin/hipcc diff --git a/projects/clr/hipamd/samples/0_Intro/bit_extract/bit_extract.cpp b/projects/clr/hipamd/samples/0_Intro/bit_extract/bit_extract.cpp index 5545a99c0f..bdc8182c38 100644 --- a/projects/clr/hipamd/samples/0_Intro/bit_extract/bit_extract.cpp +++ b/projects/clr/hipamd/samples/0_Intro/bit_extract/bit_extract.cpp @@ -24,11 +24,14 @@ THE SOFTWARE. #include -#define CHECK(error) \ +#define CHECK(cmd) \ +{\ + hipError_t error = cmd;\ if (error != hipSuccess) { \ fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \ exit(EXIT_FAILURE);\ - } + }\ +} void __global__ bit_extract_kernel(hipLaunchParm lp, uint32_t *C_d, const uint32_t *A_d, size_t N) diff --git a/projects/clr/hipamd/samples/0_Intro/hcc_dialects/Makefile b/projects/clr/hipamd/samples/0_Intro/hcc_dialects/Makefile new file mode 100644 index 0000000000..108d30201c --- /dev/null +++ b/projects/clr/hipamd/samples/0_Intro/hcc_dialects/Makefile @@ -0,0 +1,66 @@ +HCC_HOME?=/opt/rocm/hcc +HCC = $(HCC_HOME)/bin/hcc + +HCC_CFLAGS= `$(HCC_HOME)/bin/hcc-config --cxxflags` +HCC_LDFLAGS= `$(HCC_HOME)/bin/hcc-config --ldflags` + +CPPAMP_CFLAGS= -std=c++amp -stdlib=libc++ -I/opt/hcc/include +CPPAMP_LDFLAGS= -std=c++amp -L/opt/hcc/lib -Wl,--rpath=/opt/hcc/lib -lc++ -lc++abi -ldl -lpthread -Wl,--whole-archive -lmcwamp -Wl,--no-whole-archive + +HIP_PATH?=/opt/rocm/hip +HIPCC=$(HIP_PATH)/bin/hipcc +HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform) + +ifneq (${HIP_PLATFORM}, hcc) +$(error hcc_dialects requires hcc compiler and only runs on hcc platform) +endif + + +TARGETS=vadd_hc_arrayview vadd_hc_array vadd_amp_arrayview vadd_hip + +all: $(TARGETS) + +clean: + rm -f $(TARGETS) *.o + +run: $(TARGETS) + @for t in $(TARGETS); do\ + echo "Running $$t"; \ + ./$$t; \ + done + + +# HCC version: +vadd_hc_arrayview.o: vadd_hc_arrayview.cpp + $(HCC) $(HCC_CFLAGS) -c $< -o $@ +vadd_hc_arrayview: vadd_hc_arrayview.o + $(HCC) $(HCC_LDFLAGS) $< -o $@ + + +# HCC version, using explicit arrays: +vadd_hc_array.o: vadd_hc_array.cpp + $(HCC) $(HCC_CFLAGS) -c $< -o $@ +vadd_hc_array: vadd_hc_array.o + $(HCC) $(HCC_LDFLAGS) $< -o $@ + + +# HCC version, using AM (accelerator memory) pointer +vadd_hc_am.o: vadd_hc_am.cpp + $(HCC) $(HCC_CFLAGS) -c $< -o $@ +vadd_hc_am: vadd_hc_am.o + $(HCC) $(HCC_LDFLAGS) $< -o $@ + + + +# HIP version: +vadd_hip.o: vadd_hip.cpp + $(HIPCC) -c $< -o $@ +vadd_hip: vadd_hip.o + $(HIPCC) $< -o $@ + + +# AMP version: +vadd_amp_arrayview.o: vadd_amp_arrayview.cpp + $(HCC) $(CPPAMP_CFLAGS) -c $< -o $@ +vadd_amp_arrayview: vadd_amp_arrayview.o + $(HCC) $(CPPAMP_LDFLAGS) $< -o $@ diff --git a/projects/clr/hipamd/samples/0_Intro/hcc_dialects/vadd_amp_arrayview.cpp b/projects/clr/hipamd/samples/0_Intro/hcc_dialects/vadd_amp_arrayview.cpp new file mode 100644 index 0000000000..6fdea5d830 --- /dev/null +++ b/projects/clr/hipamd/samples/0_Intro/hcc_dialects/vadd_amp_arrayview.cpp @@ -0,0 +1,48 @@ +// Simple test showing how to use C++AMP syntax with array_view. +// The code uses AMP's array_view class, which provides automatic data synchronization +// of data between the host and the accelerator. As noted below, the HCC runtime +// will automatically copy data to and from the host, without the user needing +// to manually perform such copies. This is an excellent mode for developers +// new to GPU programming and matches the memory models provided by recent systems where +// CPU and GPU share the same memory pool. Advanced programmers may prefer +// more explicit control over the data movement - shown in the other vadd_hc_array and +// vadd_hc_am examples. +// This example shows the similarity between C++AMP and and HC for simple cases where +// implicit data transfer is used - really the only difference is the namespace. +// Other examples show some of the more advanced controls. + +#include + +int main(int argc, char *argv[]) +{ + int sizeElements = 1000000; + + // Allocate auto-managed host/device views of data: + concurrency::array_view A(sizeElements); + concurrency::array_view B(sizeElements); + concurrency::array_view C(sizeElements); + + // Initialize host data + for (int i=0; i (sizeElements), + [=] (concurrency::index<1> idx) restrict(amp) { + int i = idx[0]; + C[i] = A[i] + B[i]; + }); + + for (int i=0; i +#include + +int main(int argc, char *argv[]) +{ + int sizeElements = 1000000; + size_t sizeBytes = sizeElements * sizeof(float); + + // Allocate host memory + float *A_h = (float*)malloc(sizeBytes); + float *B_h = (float*)malloc(sizeBytes); + float *C_h = (float*)malloc(sizeBytes); + + // Allocate device pointers: + // Unlike array_view, these must be explicitly managed by user: + hc::accelerator acc; // grab default accelerator where we want to allocate memory: + hc::accelerator_view av = acc.get_default_view(); + + float *A_d, *B_d, *C_d; + A_d = hc::am_alloc(sizeBytes, acc, 0); + B_d = hc::am_alloc(sizeBytes, acc, 0); + C_d = hc::am_alloc(sizeBytes, acc, 0); + + // Initialize host data + for (int i=0; i (sizeElements), + [&] (hc::index<1> idx) [[hc]] { + int i = idx[0]; + C_d[i] = A_d[i] + B_d[i]; + }); + + + // This copy is in same AV as the kernel and thus will wait for the kernel to finish before executing. + av.copy(C_d, C_h); // C++ copy D2H + + + for (int i=0; i + +int main(int argc, char *argv[]) +{ + int sizeElements = 1000000; + size_t sizeBytes = sizeElements * sizeof(float); + + // Allocate host memory + float *A_h = (float*)malloc(sizeBytes); + float *B_h = (float*)malloc(sizeBytes); + float *C_h = (float*)malloc(sizeBytes); + + // Allocate device arrays<> + // Unlike array_view, these must be explicitly managed by user: + hc::array A_d(sizeElements); + hc::array B_d(sizeElements); + hc::array C_d(sizeElements); + + // Initialize host data + for (int i=0; i types are not implicitly copied, so we performed copies above. + hc::parallel_for_each(hc::extent<1> (sizeElements), + [&] (hc::index<1> idx) [[hc]] { + int i = idx[0]; + C_d[i] = A_d[i] + B_d[i]; + }); + + // HCC runtime knows that C_d depends on previous PFE and will force the copy to wait for the PFE to complte. + hc::copy(C_d, C_h); // C++ copy D2H + + + for (int i=0; i + +int main(int argc, char *argv[]) +{ + int size = 1000000; + + // Allocate auto-managed host/device views of data: + hc::array_view A(size); + hc::array_view B(size); + hc::array_view C(size); + + // Initialize host data + for (int i=0; i (size), + [=] (hc::index<1> idx) [[hc]] { + int i = idx[0]; + C[i] = A[i] + B[i]; + }); + + for (int i=0; i + +int main(int argc, char *argv[]) +{ + int sizeElements = 1000000; + + // Allocate auto-managed host/device views of data: + hc::array_view A(sizeElements); + hc::array_view B(sizeElements); + hc::array_view C(sizeElements); + + // Initialize host data + for (int i=0; i (sizeElements), + [=] (hc::index<1> idx) [[hc]] { + int i = idx[0]; + C[i] = A[i] + B[i]; + }); + + for (int i=0; i + +__global__ void vadd_hip(hipLaunchParm lp, const float *a, const float *b, float *c, int N) +{ + int idx = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + + if (idx < N) { + c[idx] = a[idx] + b[idx]; + } +} + + +int main(int argc, char *argv[]) +{ + int sizeElements = 1000000; + size_t sizeBytes = sizeElements * sizeof(float); + + // Allocate host memory + float *A_h = (float*)malloc(sizeBytes); + float *B_h = (float*)malloc(sizeBytes); + float *C_h = (float*)malloc(sizeBytes); + + // Allocate device memory: + float *A_d, *B_d, *C_d; + hipMalloc(&A_d, sizeBytes); + hipMalloc(&B_d, sizeBytes); + hipMalloc(&C_d, sizeBytes); + + // Initialize host data + for (int i=0; i #include -#define CHECK(error) \ +#define CHECK(cmd) \ +{\ + hipError_t error = cmd;\ if (error != cudaSuccess) { \ fprintf(stderr, "error: '%s'(%d) at %s:%d\n", cudaGetErrorString(error), error,__FILE__, __LINE__); \ exit(EXIT_FAILURE);\ - } + }\ +} /* diff --git a/projects/clr/hipamd/samples/0_Intro/square/square.hipref.cpp b/projects/clr/hipamd/samples/0_Intro/square/square.hipref.cpp index 5d53a8d584..aa14077738 100644 --- a/projects/clr/hipamd/samples/0_Intro/square/square.hipref.cpp +++ b/projects/clr/hipamd/samples/0_Intro/square/square.hipref.cpp @@ -22,11 +22,14 @@ THE SOFTWARE. #include #include -#define CHECK(error) \ +#define CHECK(cmd) \ +{\ + hipError_t error = cmd;\ if (error != hipSuccess) { \ fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \ exit(EXIT_FAILURE);\ - } + }\ +} /*