From 106030f2c60821cb0f0536795b4d79940c34e8fe Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Tue, 7 Mar 2017 11:24:32 -0600 Subject: [PATCH 01/23] added new field to hipDeviceProp_t structure gcnArch. 1. It is an integer containing gfx values 701, 801, 802, 803 2. On NV path, it is zero Change-Id: I2b4c7f48981d0214d8c6b1905d2cc85b16203419 [ROCm/clr commit: 9f575721aa940f65a8b5f542b6fc75c14cadf956] --- .../clr/hipamd/include/hip/hip_runtime_api.h | 1 + .../samples/0_Intro/square/square.hipref.cpp | 10 +++++---- projects/clr/hipamd/src/hip_hcc.cpp | 21 +++++++++++++++---- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/projects/clr/hipamd/include/hip/hip_runtime_api.h b/projects/clr/hipamd/include/hip/hip_runtime_api.h index 28d67fc01a..818c0b7c34 100644 --- a/projects/clr/hipamd/include/hip/hip_runtime_api.h +++ b/projects/clr/hipamd/include/hip/hip_runtime_api.h @@ -106,6 +106,7 @@ typedef struct hipDeviceProp_t { size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor. int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not. int canMapHostMemory; ///< Check whether HIP can map host memory + int gcnArch; ///< AMD GCN Arch Value. Eg: 803, 701 } hipDeviceProp_t; diff --git a/projects/clr/hipamd/samples/0_Intro/square/square.hipref.cpp b/projects/clr/hipamd/samples/0_Intro/square/square.hipref.cpp index 0073c1399a..e694bfb8a4 100644 --- a/projects/clr/hipamd/samples/0_Intro/square/square.hipref.cpp +++ b/projects/clr/hipamd/samples/0_Intro/square/square.hipref.cpp @@ -32,7 +32,7 @@ THE SOFTWARE. }\ } -/* +/* * Square each element in the array A and write to array C. */ template @@ -58,16 +58,18 @@ int main(int argc, char *argv[]) hipDeviceProp_t props; CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/)); printf ("info: running on device %s\n", props.name); - + #ifdef __HIP_PLATFORM_HCC__ + printf ("info: architecture on AMD GPU device is: %d\n",props.gcnArch); + #endif printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); A_h = (float*)malloc(Nbytes); CHECK(A_h == 0 ? hipErrorMemoryAllocation : hipSuccess ); C_h = (float*)malloc(Nbytes); CHECK(C_h == 0 ? hipErrorMemoryAllocation : hipSuccess ); // Fill with Phi + i - for (size_t i=0; iisMultiGpuBoard = 0 ? gpuAgentsCount < 2 : 1; // Get agent name -#if HIP_USE_PRODUCT_NAME + err = hsa_agent_get_info(_hsaAgent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_PRODUCT_NAME, &(prop->name)); -#else - err = hsa_agent_get_info(_hsaAgent, HSA_AGENT_INFO_NAME, &(prop->name)); -#endif + char archName[256]; + err = hsa_agent_get_info(_hsaAgent, HSA_AGENT_INFO_NAME, &archName); + + if(strcmp(archName,"gfx701")==0){ + prop->gcnArch = 701; + } + if(strcmp(archName,"gfx801")==0){ + prop->gcnArch = 801; + } + if(strcmp(archName,"gfx802")==0){ + prop->gcnArch = 802; + } + if(strcmp(archName,"gfx803")==0){ + prop->gcnArch = 803; + } + DeviceErrorCheck(err); // Get agent node From c861d10d1e897018a3bac8e672e088b54aa2d8bd Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Tue, 7 Mar 2017 13:46:29 -0600 Subject: [PATCH 02/23] Added new API, hipMemPtrGetInfo 1. This API returns memory allocation size of pointer 2. Added test to check its functionality Change-Id: I87976d817b5a6ca5530336c09e7cb0420601cb2c [ROCm/clr commit: 7b7d53f875c2ff1960397c222938095fd242cb8f] --- .../include/hip/hcc_detail/hip_runtime_api.h | 3 ++ projects/clr/hipamd/src/hip_memory.cpp | 21 ++++++++ .../src/runtimeApi/memory/hipHostGetFlags.cpp | 2 +- .../runtimeApi/memory/hipMemPtrGetInfo.cpp | 52 +++++++++++++++++++ 4 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 projects/clr/hipamd/tests/src/runtimeApi/memory/hipMemPtrGetInfo.cpp diff --git a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h index f156d3fdbd..fb8535cfec 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h @@ -1236,6 +1236,9 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t st hipError_t hipMemGetInfo (size_t * free, size_t * total) ; +hipError_t hipMemPtrGetInfo(void *ptr, size_t *size); + + /** * @brief Allocate an array on the device. * diff --git a/projects/clr/hipamd/src/hip_memory.cpp b/projects/clr/hipamd/src/hip_memory.cpp index 479040c099..29315fa09d 100644 --- a/projects/clr/hipamd/src/hip_memory.cpp +++ b/projects/clr/hipamd/src/hip_memory.cpp @@ -1024,6 +1024,27 @@ hipError_t hipMemGetInfo (size_t *free, size_t *total) return ihipLogStatus(e); } +hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) +{ + HIP_INIT_API(ptr, size); + + hipError_t e = hipSuccess; + + if(ptr != nullptr && size != nullptr){ + hc::accelerator acc; + hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0); + am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr); + if(status == AM_SUCCESS){ + *size = amPointerInfo._sizeBytes; + }else{ + e = hipErrorInvalidValue; + } + }else{ + e = hipErrorInvalidValue; + } + return ihipLogStatus(e); +} + hipError_t hipFree(void* ptr) { HIP_INIT_API(ptr); diff --git a/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostGetFlags.cpp b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostGetFlags.cpp index a989b879ac..9fad60aec8 100644 --- a/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostGetFlags.cpp +++ b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostGetFlags.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/projects/clr/hipamd/tests/src/runtimeApi/memory/hipMemPtrGetInfo.cpp b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipMemPtrGetInfo.cpp new file mode 100644 index 0000000000..5aa0072199 --- /dev/null +++ b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipMemPtrGetInfo.cpp @@ -0,0 +1,52 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * RUN: %t + * HIT_END + */ + +#include"test_common.h" + +struct { + float a; + int b; + void *c; +} Struct ; + +int main(){ + int *iPtr; + float *fPtr; + struct Struct *sPtr; + size_t sSetSize = 1024, sGetSize; + hipMalloc(&iPtr, sSetSize); + hipMalloc(&fPtr, sSetSize); + hipMalloc(&sPtr, sSetSize); + hipMemPtrGetInfo(iPtr, &sGetSize); + assert(sGetSize == sSetSize); + hipMemPtrGetInfo(fPtr, &sGetSize); + assert(sGetSize == sSetSize); + hipMemPtrGetInfo(sPtr, &sGetSize); + assert(sGetSize == sSetSize); + passed(); +} From c0d91d1c3ac70f1bc06a19ecb1b12add5c70688c Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Tue, 7 Mar 2017 14:06:03 -0600 Subject: [PATCH 03/23] fixed atan2f arguments Change-Id: I0bb621e94d57594c3899e51d0c34ef43306cead0 [ROCm/clr commit: 2ea7c5d28a2de155368767e2d3338c54c6039624] --- projects/clr/hipamd/src/math_functions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/clr/hipamd/src/math_functions.cpp b/projects/clr/hipamd/src/math_functions.cpp index a1ee9d3ce5..230eb2aacc 100644 --- a/projects/clr/hipamd/src/math_functions.cpp +++ b/projects/clr/hipamd/src/math_functions.cpp @@ -46,7 +46,7 @@ __device__ float asinhf(float x) } __device__ float atan2f(float y, float x) { - return hc::precise_math::atan2f(x, y); + return hc::precise_math::atan2f(y, x); } __device__ float atanf(float x) { From 27cf10c0949c28f9d5155118ad5085bcb3f57f52 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Wed, 8 Mar 2017 16:16:08 +0530 Subject: [PATCH 04/23] Disable hipMemPtrGetInfo test on nvcc path Change-Id: I864e571314abfe5ae614e6792c86d7b457c920ee [ROCm/clr commit: 8bd20732f9e3525eea75c112271737731be235eb] --- .../clr/hipamd/tests/src/runtimeApi/memory/hipMemPtrGetInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/clr/hipamd/tests/src/runtimeApi/memory/hipMemPtrGetInfo.cpp b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipMemPtrGetInfo.cpp index 5aa0072199..1f78b4afab 100644 --- a/projects/clr/hipamd/tests/src/runtimeApi/memory/hipMemPtrGetInfo.cpp +++ b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipMemPtrGetInfo.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../../test_common.cpp + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc * RUN: %t * HIT_END */ From 0710dbde5503ae8d06522ef01086214f4d80a3cf Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Wed, 8 Mar 2017 13:49:50 -0600 Subject: [PATCH 05/23] Fix bug in hipModuleGetFunction. Modules with > 1 function didn't return the function correctly. Also fix coding convention issues [ROCm/clr commit: 09df0977c01be8b93d24d706993c32ad0ee88137] --- projects/clr/hipamd/src/hip_module.cpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/projects/clr/hipamd/src/hip_module.cpp b/projects/clr/hipamd/src/hip_module.cpp index f21adf9691..1f20a47c13 100644 --- a/projects/clr/hipamd/src/hip_module.cpp +++ b/projects/clr/hipamd/src/hip_module.cpp @@ -218,31 +218,33 @@ hipError_t hipModuleUnload(hipModule_t hmod) { ret = hipErrorInvalidValue; } - for(std::list::iterator f = hmod->funcTrack.begin(); f != hmod->funcTrack.end(); ++f) { + for(auto f = hmod->funcTrack.begin(); f != hmod->funcTrack.end(); ++f) { delete *f; } delete hmod; return ihipLogStatus(ret); } -hipError_t ihipModuleGetSymbol(hipFunction_t *func, hipModule_t hmod, const char *name){ + +hipError_t ihipModuleGetSymbol(hipFunction_t *func, hipModule_t hmod, const char *name) +{ auto ctx = ihipGetTlsDefaultCtx(); hipError_t ret = hipSuccess; - if(name == nullptr){ + if (name == nullptr){ return ihipLogStatus(hipErrorInvalidValue); } - if(ctx == nullptr){ + if (ctx == nullptr){ ret = hipErrorInvalidContext; - }else{ + } else { std::string str(name); - for(std::list::iterator f = hmod->funcTrack.begin(); f != hmod->funcTrack.end(); ++f) { - if((*f)->_name == str) { - *func = *f; - } - return ret; + for(auto f = hmod->funcTrack.begin(); f != hmod->funcTrack.end(); ++f) { + if((*f)->_name == str) { + *func = *f; + return ret; + } } ihipModuleSymbol_t *sym = new ihipModuleSymbol_t; int deviceId = ctx->getDevice()->_deviceId; From 3152223be6d436784ef78832c2caf7be3727a743 Mon Sep 17 00:00:00 2001 From: pensun Date: Wed, 8 Mar 2017 14:06:09 -0600 Subject: [PATCH 06/23] add inline to all hip_complex operators Change-Id: Ifba5966c297cbc9299c39ecfc45c7296003ebb5d [ROCm/clr commit: 14a5d3c80db360e71e9fb90715f23f08282e8c50] --- .../clr/hipamd/include/hip/hcc_detail/hip_complex.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/projects/clr/hipamd/include/hip/hcc_detail/hip_complex.h b/projects/clr/hipamd/include/hip/hcc_detail/hip_complex.h index d4fea7f034..f50a601b90 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/hip_complex.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/hip_complex.h @@ -28,7 +28,7 @@ THE SOFTWARE. #if __cplusplus #define COMPLEX_ADD_OP_OVERLOAD(type) \ -__device__ __host__ static type operator + (const type& lhs, const type& rhs) { \ +__device__ __host__ static inline type operator + (const type& lhs, const type& rhs) { \ type ret; \ ret.x = lhs.x + rhs.x ; \ ret.y = lhs.y + rhs.y ; \ @@ -36,7 +36,7 @@ __device__ __host__ static type operator + (const type& lhs, const type& rhs) { } #define COMPLEX_SUB_OP_OVERLOAD(type) \ -__device__ __host__ static type operator - (const type& lhs, const type& rhs) { \ +__device__ __host__ static inline type operator - (const type& lhs, const type& rhs) { \ type ret; \ ret.x = lhs.x - rhs.x; \ ret.y = lhs.y - rhs.y; \ @@ -44,7 +44,7 @@ __device__ __host__ static type operator - (const type& lhs, const type& rhs) { } #define COMPLEX_MUL_OP_OVERLOAD(type) \ -__device__ __host__ static type operator * (const type& lhs, const type& rhs) { \ +__device__ __host__ static inline type operator * (const type& lhs, const type& rhs) { \ type ret; \ ret.x = lhs.x * rhs.x - lhs.y * rhs.y; \ ret.y = lhs.x * rhs.y + lhs.y * rhs.x; \ @@ -52,7 +52,7 @@ __device__ __host__ static type operator * (const type& lhs, const type& rhs) { } #define COMPLEX_DIV_OP_OVERLOAD(type) \ -__device__ __host__ static type operator / (const type& lhs, const type& rhs) { \ +__device__ __host__ static inline type operator / (const type& lhs, const type& rhs) { \ type ret; \ ret.x = (lhs.x * rhs.x + lhs.y * rhs.y); \ ret.y = (rhs.x * lhs.y - lhs.x * rhs.y); \ @@ -88,7 +88,7 @@ __device__ __host__ static inline type& operator /= (type& lhs, const type& rhs) } #define COMPLEX_SCALAR_PRODUCT(type, type1) \ -__device__ __host__ static type operator * (const type& lhs, type1 rhs) { \ +__device__ __host__ static inline type operator * (const type& lhs, type1 rhs) { \ type ret; \ ret.x = lhs.x * rhs; \ ret.y = lhs.y * rhs; \ From f4e9b51d35522236df7447734fd5a8b39d7a2be2 Mon Sep 17 00:00:00 2001 From: pensun Date: Wed, 8 Mar 2017 23:37:50 -0600 Subject: [PATCH 07/23] fix typo in hip_porting_guide Change-Id: I42553d9a4de2901dfdd57384b52a04e8fb22edde [ROCm/clr commit: 308638c9117883b79a97254e820194229aa68724] --- projects/clr/hipamd/docs/markdown/hip_porting_guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/clr/hipamd/docs/markdown/hip_porting_guide.md b/projects/clr/hipamd/docs/markdown/hip_porting_guide.md index 0acdc246f9..721a6fabd7 100644 --- a/projects/clr/hipamd/docs/markdown/hip_porting_guide.md +++ b/projects/clr/hipamd/docs/markdown/hip_porting_guide.md @@ -166,7 +166,7 @@ Both nvcc and hcc make two passes over the code: one for host code and one for d ``` // #ifdef __CUDA_ARCH__ -#ifdef __HIP_DEVICE_COMPILE__ && (__HIP_DEVICE_COMPILE__ == 1) +#if defined(__HIP_DEVICE_COMPILE__) && (__HIP_DEVICE_COMPILE__ == 1) ``` Unlike `__CUDA_ARCH__`, the `__HIP_DEVICE_COMPILE__` value is 0 or 1, and it doesn’t represent the feature capability of the target device. From 974cb587a83c07c11ce4a053d966e64aefcd5a02 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Thu, 9 Mar 2017 08:52:50 -0600 Subject: [PATCH 08/23] make 4_shfl cookbook sample only for fiji 1. __shfl is not supported on hawaii gfx701 Change-Id: Iac09f5d30ee0674b8f58a6e74ec5c49b02be32ad [ROCm/clr commit: 7f4b24886faccc63e66889e665d74b5818f46fb6] --- projects/clr/hipamd/samples/2_Cookbook/4_shfl/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/projects/clr/hipamd/samples/2_Cookbook/4_shfl/Makefile b/projects/clr/hipamd/samples/2_Cookbook/4_shfl/Makefile index 3383cf2bf5..21c0e93959 100644 --- a/projects/clr/hipamd/samples/2_Cookbook/4_shfl/Makefile +++ b/projects/clr/hipamd/samples/2_Cookbook/4_shfl/Makefile @@ -22,7 +22,7 @@ CXX=$(HIPCC) $(EXECUTABLE): $(OBJECTS) - $(HIPCC) $(OBJECTS) -o $@ + $(HIPCC) --amdgpu-target=gfx803 $(OBJECTS) -o $@ test: $(EXECUTABLE) @@ -33,4 +33,3 @@ clean: rm -f $(EXECUTABLE) rm -f $(OBJECTS) rm -f $(HIP_PATH)/src/*.o - From 4dd856eb67fa6e8279d711e78e951f9739d2a1b6 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Fri, 10 Mar 2017 10:29:52 +0530 Subject: [PATCH 09/23] Fix for HCSWAP-128, make 5_2dshfl cookbook sample only for fiji Change-Id: I8869c28151bca1bd47a053a2808e93a801d16d00 [ROCm/clr commit: d48943699dedac799be4166e7bc476ad058b89dd] --- projects/clr/hipamd/samples/2_Cookbook/5_2dshfl/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/clr/hipamd/samples/2_Cookbook/5_2dshfl/Makefile b/projects/clr/hipamd/samples/2_Cookbook/5_2dshfl/Makefile index b742bbf80a..6abaf658b1 100644 --- a/projects/clr/hipamd/samples/2_Cookbook/5_2dshfl/Makefile +++ b/projects/clr/hipamd/samples/2_Cookbook/5_2dshfl/Makefile @@ -22,7 +22,7 @@ CXX=$(HIPCC) $(EXECUTABLE): $(OBJECTS) - $(HIPCC) $(OBJECTS) -o $@ + $(HIPCC) --amdgpu-target=gfx803 $(OBJECTS) -o $@ test: $(EXECUTABLE) From a816a9eb54a0d7cbc6d5574acccfb3328f10f661 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Fri, 10 Mar 2017 08:40:59 -0600 Subject: [PATCH 10/23] Added architecture guards around __shfl, dpp and ds_permute device functions Change-Id: I10f9b08618fbf25b61c1932278fc5759e41c0d66 [ROCm/clr commit: 046ec0375b280098719fbd0544da7dbc5488c59b] --- .../include/hip/hcc_detail/hip_runtime.h | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime.h b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime.h index 67c63103d3..6acc604909 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -226,6 +226,8 @@ __device__ int __all( int input); __device__ int __any( int input); __device__ unsigned long long int __ballot( int input); +#if __HIP_ARCH_GFX803__ == 1 + // warp shuffle functions #ifdef __cplusplus __device__ int __shfl(int input, int lane, int width=warpSize); @@ -247,6 +249,18 @@ __device__ float __shfl_down(float input, unsigned int lane_delta, int width); __device__ float __shfl_xor(float input, int lane_mask, int width); #endif +__device__ unsigned __hip_ds_bpermute(int index, unsigned src); +__device__ float __hip_ds_bpermutef(int index, float src); +__device__ unsigned __hip_ds_permute(int index, unsigned src); +__device__ float __hip_ds_permutef(int index, float src); + +__device__ unsigned __hip_ds_swizzle(unsigned int src, int pattern); +__device__ float __hip_ds_swizzlef(float src, int pattern); + +__device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl); + +#endif + __host__ __device__ int min(int arg1, int arg2); __host__ __device__ int max(int arg1, int arg2); @@ -321,16 +335,6 @@ __device__ static inline void __threadfence(void) { //__device__ void __threadfence_system(void) __attribute__((deprecated("Provided with workaround configuration, see hip_kernel_language.md for details"))); __device__ void __threadfence_system(void) ; -__device__ unsigned __hip_ds_bpermute(int index, unsigned src); -__device__ float __hip_ds_bpermutef(int index, float src); -__device__ unsigned __hip_ds_permute(int index, unsigned src); -__device__ float __hip_ds_permutef(int index, float src); - -__device__ unsigned __hip_ds_swizzle(unsigned int src, int pattern); -__device__ float __hip_ds_swizzlef(float src, int pattern); - -__device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl); - // doxygen end Fence Fence /** * @} From a49f5c7e1d6f4e5c553b9308991ebbfa4968e742 Mon Sep 17 00:00:00 2001 From: pensun Date: Thu, 9 Mar 2017 16:30:34 -0600 Subject: [PATCH 11/23] update porting guide for updated __HIP_DEVICE_COMPILE__ Change-Id: I0f025d354f76e2d728231bf112a77e8c8fcacc8c [ROCm/clr commit: 1a2844e3a3415ba6b1befe7630b615b4dcd962c7] --- .../hipamd/docs/markdown/hip_porting_guide.md | 6 +++--- projects/clr/hipamd/include/hip/hip_common.h | 17 ++++------------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/projects/clr/hipamd/docs/markdown/hip_porting_guide.md b/projects/clr/hipamd/docs/markdown/hip_porting_guide.md index 721a6fabd7..9f20d12423 100644 --- a/projects/clr/hipamd/docs/markdown/hip_porting_guide.md +++ b/projects/clr/hipamd/docs/markdown/hip_porting_guide.md @@ -166,10 +166,10 @@ Both nvcc and hcc make two passes over the code: one for host code and one for d ``` // #ifdef __CUDA_ARCH__ -#if defined(__HIP_DEVICE_COMPILE__) && (__HIP_DEVICE_COMPILE__ == 1) +#if __HIP_DEVICE_COMPILE__ ``` -Unlike `__CUDA_ARCH__`, the `__HIP_DEVICE_COMPILE__` value is 0 or 1, and it doesn’t represent the feature capability of the target device. +Unlike `__CUDA_ARCH__`, the `__HIP_DEVICE_COMPILE__` value is 1 or undefined, and it doesn’t represent the feature capability of the target device. ### Compiler Defines: Summary @@ -178,7 +178,7 @@ Unlike `__CUDA_ARCH__`, the `__HIP_DEVICE_COMPILE__` value is 0 or 1, and it doe |HIP-related defines:| |`__HIP_PLATFORM_HCC___`| Defined | Undefined | Defined if targeting hcc platform; undefined otherwise | |`__HIP_PLATFORM_NVCC___`| Undefined | Defined | Defined if targeting nvcc platform; undefined otherwise | -|`__HIP_DEVICE_COMPILE__` | 1 if compiling for device; 0 if compiling for host |1 if compiling for device; 0 if compiling for host | Undefined +|`__HIP_DEVICE_COMPILE__` | 1 if compiling for device; undefined if compiling for host |1 if compiling for device; undefined if compiling for host | Undefined |`__HIPCC__` | Defined | Defined | Undefined |`__HIP_ARCH_*` | 0 or 1 depending on feature support (see below) | 0 or 1 depending on feature support (see below) | 0 |nvcc-related defines:| diff --git a/projects/clr/hipamd/include/hip/hip_common.h b/projects/clr/hipamd/include/hip/hip_common.h index 6223a2fe9e..6317a792ee 100644 --- a/projects/clr/hipamd/include/hip/hip_common.h +++ b/projects/clr/hipamd/include/hip/hip_common.h @@ -27,13 +27,6 @@ THE SOFTWARE. // Other compiler (GCC,ICC,etc) need to set one of these macros explicitly #if defined(__HCC__) #define __HIP_PLATFORM_HCC__ - -#if defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0) -#define __HIP_DEVICE_COMPILE__ 1 -#else -#define __HIP_DEVICE_COMPILE__ 0 -#endif - #endif //__HCC__ // Auto enable __HIP_PLATFORM_NVCC__ if compiling with NVCC @@ -43,14 +36,12 @@ THE SOFTWARE. #define __HIPCC__ #endif -#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ != 0) -#define __HIP_DEVICE_COMPILE__ 1 -#else -#define __HIP_DEVICE_COMPILE__ 0 -#endif - #endif //__NVCC__ +// Auto enable __HIP_DEVICE_COMPILE__ if compiled in HCC or NVCC device path +#if (defined(__HCC_ACCELERATOR__) && __HCC_ACCELERATOR__ != 0) || (defined(__CUDA_ARCH__) && __CUDA_ARCH__ != 0) + #define __HIP_DEVICE_COMPILE__ 1 +#endif #if __HIP_DEVICE_COMPILE__ == 0 // 32-bit Atomics From b2b83617635a9423cff655a1a7663a84d1e1806e Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Fri, 10 Mar 2017 23:45:28 +0530 Subject: [PATCH 12/23] IPC supported using ROCR APIs Change-Id: I0a353b1240098f4b20fa266a871f5f5826290af9 [ROCm/clr commit: 3af487007b520c78bf32c16c5f9840eaa28058a4] --- .../include/hip/hcc_detail/hip_runtime_api.h | 7 ++++++- projects/clr/hipamd/src/hip_hcc.h | 6 +++--- projects/clr/hipamd/src/hip_memory.cpp | 19 ++++++++++--------- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h index fb8535cfec..080f82d9ed 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h @@ -62,7 +62,12 @@ typedef struct ihipStream_t *hipStream_t; #define hipIpcMemLazyEnablePeerAccess 0 -typedef struct ihipIpcMemHandle_t *hipIpcMemHandle_t; +#define HIP_IPC_HANDLE_SIZE 64 + +typedef struct hipIpcMemHandle_st +{ + char reserved[HIP_IPC_HANDLE_SIZE]; +}hipIpcMemHandle_t; //TODO: IPC event handle currently unsupported struct ihipIpcEventHandle_t; diff --git a/projects/clr/hipamd/src/hip_hcc.h b/projects/clr/hipamd/src/hip_hcc.h index 105eef6bb8..b23aead072 100644 --- a/projects/clr/hipamd/src/hip_hcc.h +++ b/projects/clr/hipamd/src/hip_hcc.h @@ -36,7 +36,7 @@ THE SOFTWARE. #error("This version of HIP requires a newer version of HCC."); #endif -#define USE_IPC 0 +#define USE_IPC 1 //--- // Environment variables: @@ -326,15 +326,15 @@ const hipStream_t hipStreamNull = 0x0; /** * HIP IPC Handle Size */ -#define HIP_IPC_HANDLE_SIZE 64 +#define HIP_IPC_RESERVED_SIZE 24 class ihipIpcMemHandle_t { public: #if USE_IPC hsa_amd_ipc_memory_t ipc_handle; ///< ipc memory handle on ROCr #endif - char reserved[HIP_IPC_HANDLE_SIZE]; size_t psize; + char reserved[HIP_IPC_RESERVED_SIZE]; }; diff --git a/projects/clr/hipamd/src/hip_memory.cpp b/projects/clr/hipamd/src/hip_memory.cpp index 29315fa09d..df11205344 100644 --- a/projects/clr/hipamd/src/hip_memory.cpp +++ b/projects/clr/hipamd/src/hip_memory.cpp @@ -1143,7 +1143,7 @@ hipError_t hipMemGetAddressRange ( hipDeviceptr_t* pbase, size_t* psize, hipDevi } else hipStatus = hipErrorInvalidDevicePointer; - return hipStatus; + return ihipLogStatus(hipStatus); } @@ -1162,25 +1162,25 @@ hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr){ } else hipStatus = hipErrorInvalidResourceHandle; - + ihipIpcMemHandle_t* iHandle = (ihipIpcMemHandle_t*) handle; // Save the size of the pointer to hipIpcMemHandle - (*handle)->psize = psize; + iHandle->psize = psize; #if USE_IPC // Create HSA ipc memory hsa_status_t hsa_status = - hsa_amd_ipc_memory_create(devPtr, psize, &(*handle)->ipc_handle); + hsa_amd_ipc_memory_create(devPtr, psize, (hsa_amd_ipc_memory_t*) &(iHandle->ipc_handle)); if(hsa_status!= HSA_STATUS_SUCCESS) hipStatus = hipErrorMemoryAllocation; #else hipStatus = hipErrorRuntimeOther; #endif - return hipStatus; + return ihipLogStatus(hipStatus); } hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags){ -// HIP_INIT_API ( devPtr, handle.handle , flags); + HIP_INIT_API ( devPtr, &handle , flags); hipError_t hipStatus = hipSuccess; #if USE_IPC @@ -1190,15 +1190,16 @@ hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned if(!agent) return hipErrorInvalidResourceHandle; + ihipIpcMemHandle_t* iHandle = (ihipIpcMemHandle_t*) &handle; //Attach ipc memory hsa_status_t hsa_status = - hsa_amd_ipc_memory_attach(&handle->ipc_handle, handle->psize, 1, agent, devPtr); + hsa_amd_ipc_memory_attach((hsa_amd_ipc_memory_t*)&(iHandle->ipc_handle), iHandle->psize, 1, agent, devPtr); if(hsa_status != HSA_STATUS_SUCCESS) hipStatus = hipErrorMapBufferObjectFailed; #else hipStatus = hipErrorRuntimeOther; #endif - return hipStatus; + return ihipLogStatus(hipStatus); } hipError_t hipIpcCloseMemHandle(void *devPtr){ @@ -1213,7 +1214,7 @@ hipError_t hipIpcCloseMemHandle(void *devPtr){ #else hipStatus = hipErrorRuntimeOther; #endif - return hipStatus; + return ihipLogStatus(hipStatus); } // hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle){ From c8a1b6bc3020e3dfee8fa9a469670d6d9668a9cb Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Fri, 10 Mar 2017 15:14:26 -0600 Subject: [PATCH 13/23] fixed warning raised by g++ using hip_vector_types.h Change-Id: I9e7cdfc8b28b03b690eecd068529cf7629296d68 [ROCm/clr commit: a47066153f6245f0c7e58b3e5979e233dbd6c58f] --- .../include/hip/hcc_detail/hip_vector_types.h | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/projects/clr/hipamd/include/hip/hcc_detail/hip_vector_types.h b/projects/clr/hipamd/include/hip/hcc_detail/hip_vector_types.h index cd5a09215a..8e6ec49511 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/hip_vector_types.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/hip_vector_types.h @@ -1260,7 +1260,7 @@ __device__ __host__ static inline type& operator op (type& val) { \ } #define DECLOP_1VAR_POSTOP(type, op) \ -__device__ __host__ static inline type operator op (type& val, int i) { \ +__device__ __host__ static inline type operator op (type& val, int) { \ type ret; \ ret.x = val.x; \ val.x op; \ @@ -1326,7 +1326,7 @@ __device__ __host__ static inline type& operator op (type& val) { \ } #define DECLOP_2VAR_POSTOP(type, op) \ -__device__ __host__ static inline type operator op (type& val, int i) { \ +__device__ __host__ static inline type operator op (type& val, int) { \ type ret; \ ret.x = val.x; \ ret.y = val.y; \ @@ -1337,7 +1337,7 @@ __device__ __host__ static inline type operator op (type& val, int i) { \ #define DECLOP_2VAR_COMP(type, op) \ __device__ __host__ static inline bool operator op (type& lhs, type& rhs) { \ - return lhs.x op rhs.x && lhs.y op rhs.y; \ + return (lhs.x op rhs.x) && (lhs.y op rhs.y); \ } #define DECLOP_2VAR_1IN_1OUT(type, op) \ @@ -1350,7 +1350,7 @@ __device__ __host__ static inline type operator op(type &rhs) { \ #define DECLOP_2VAR_1IN_BOOLOUT(type, op) \ __device__ __host__ static inline bool operator op (type &rhs) { \ - return op rhs.x && op rhs.y; \ + return (op rhs.x) && (op rhs.y); \ } @@ -1401,7 +1401,7 @@ __device__ __host__ static inline type& operator op (type& val) { \ } #define DECLOP_3VAR_POSTOP(type, op) \ -__device__ __host__ static inline type operator op (type& val, int i) { \ +__device__ __host__ static inline type operator op (type& val, int) { \ type ret; \ ret.x = val.x; \ ret.y = val.y; \ @@ -1414,7 +1414,7 @@ __device__ __host__ static inline type operator op (type& val, int i) { \ #define DECLOP_3VAR_COMP(type, op) \ __device__ __host__ static inline bool operator op (type& lhs, type& rhs) { \ - return lhs.x op rhs.x && lhs.y op rhs.y && lhs.z op rhs.z; \ + return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z); \ } #define DECLOP_3VAR_1IN_1OUT(type, op) \ @@ -1428,7 +1428,7 @@ __device__ __host__ static inline type operator op(type &rhs) { \ #define DECLOP_3VAR_1IN_BOOLOUT(type, op) \ __device__ __host__ static inline bool operator op (type &rhs) { \ - return op rhs.x && op rhs.y && op rhs.z; \ + return (op rhs.x) && (op rhs.y) && (op rhs.z); \ } @@ -1484,7 +1484,7 @@ __device__ __host__ static inline type& operator op (type& val) { \ } #define DECLOP_4VAR_POSTOP(type, op) \ -__device__ __host__ static inline type operator op (type& val, int i) { \ +__device__ __host__ static inline type operator op (type& val, int) { \ type ret; \ ret.x = val.x; \ ret.y = val.y; \ @@ -1499,7 +1499,7 @@ __device__ __host__ static inline type operator op (type& val, int i) { \ #define DECLOP_4VAR_COMP(type, op) \ __device__ __host__ static inline bool operator op (type& lhs, type& rhs) { \ - return lhs.x op rhs.x && lhs.y op rhs.y && lhs.z op rhs.z && lhs.w op rhs.w; \ + return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z) && (lhs.w op rhs.w); \ } #define DECLOP_4VAR_1IN_1OUT(type, op) \ @@ -1514,7 +1514,7 @@ __device__ __host__ static inline type operator op(type &rhs) { \ #define DECLOP_4VAR_1IN_BOOLOUT(type, op) \ __device__ __host__ static inline bool operator op (type &rhs) { \ - return op rhs.x && op rhs.y && op rhs.z && op rhs.w; \ + return (op rhs.x) && (op rhs.y) && (op rhs.z) && (op rhs.w); \ } From c8595b0373c8b1456d76ad6586446fe1e9b9dcf1 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Fri, 17 Feb 2017 11:53:38 -0600 Subject: [PATCH 14/23] Fix copying of registered memory. Set device properly so copying can recover context. Enhance test to catch this case. [ROCm/clr commit: 23a58775df926ca21323b37329c8ccdacfae82c7] --- projects/clr/hipamd/src/hip_memory.cpp | 2 ++ .../src/runtimeApi/memory/hipHostRegister.cpp | 21 +++++++++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/projects/clr/hipamd/src/hip_memory.cpp b/projects/clr/hipamd/src/hip_memory.cpp index df11205344..314dee97dc 100644 --- a/projects/clr/hipamd/src/hip_memory.cpp +++ b/projects/clr/hipamd/src/hip_memory.cpp @@ -194,6 +194,7 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) if(sizeBytes < 1 && (*ptr == NULL)){ hip_status = hipErrorMemoryAllocation; } else { + // TODO - should OR in flags here? hc::am_memtracker_update(*ptr, device->_deviceId, amHostCoherent); } tprintf(DB_MEM, " %s: finegrained system memory ptr=%p\n", __func__, *ptr); @@ -403,6 +404,7 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags) vecAcc.push_back(ihipGetDevice(i)->_acc); } am_status = hc::am_memory_host_lock(device->_acc, hostPtr, sizeBytes, &vecAcc[0], vecAcc.size()); + hc::am_memtracker_update(hostPtr, device->_deviceId, flags); tprintf(DB_MEM, " %s registered ptr=%p\n", __func__, hostPtr); if(am_status == AM_SUCCESS){ diff --git a/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostRegister.cpp b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostRegister.cpp index 37ee9b1b78..6c81ec0d91 100644 --- a/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostRegister.cpp +++ b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostRegister.cpp @@ -39,21 +39,30 @@ int main(){ const size_t size = N * sizeof(float); A = (float*)malloc(size); HIPCHECK(hipHostRegister(A, size, 0)); + + for(int i=0;i Date: Fri, 17 Feb 2017 15:43:09 -0600 Subject: [PATCH 15/23] Update hipHostRegister debug and pointerTracker debug and notes [ROCm/clr commit: e43592721ee48c8cfd800cebd968a326a50dd137] --- projects/clr/hipamd/src/hip_hcc.cpp | 40 +++++++++++------ projects/clr/hipamd/src/hip_memory.cpp | 22 +++++++++- .../src/runtimeApi/memory/hipHostRegister.cpp | 43 ++++++++++++++++--- 3 files changed, 87 insertions(+), 18 deletions(-) diff --git a/projects/clr/hipamd/src/hip_hcc.cpp b/projects/clr/hipamd/src/hip_hcc.cpp index fec5bb6a8a..760f46046a 100644 --- a/projects/clr/hipamd/src/hip_hcc.cpp +++ b/projects/clr/hipamd/src/hip_hcc.cpp @@ -1803,6 +1803,20 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind, } +void printPointerInfo(unsigned dbFlag, const char *tag, const void *ptr, const hc::AmPointerInfo &ptrInfo) +{ + tprintf (dbFlag, " %s=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d registered=%d\n", + tag, ptr, + ptrInfo._hostPointer, ptrInfo._devicePointer, ptrInfo._sizeBytes, + ptrInfo._appId, ptrInfo._sizeBytes != 0, ptrInfo._isInDeviceMem, !ptrInfo._isAmManaged); +} + + +// TODO : For registered and host memory, if the portable flag is set, we need to recognize that and perform appropriate copy operation. +// What can happen now is that Portable memory is mapped into multiple devices but Peer access is not enabled. i +// The peer detection logic doesn't see that the memory is already mapped and so tries to use an unpinned copy algorithm. If this is PinInPlace, then an error can occur. +// Need to track Portable flag correctly or use new RT functionality to query the peer status for the pointer. +// // TODO - remove kind parm from here or use it below? void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes, unsigned kind, bool resolveOn) { @@ -1819,6 +1833,16 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes, bool dstTracked = (hc::am_memtracker_getinfo(&dstPtrInfo, dst) == AM_SUCCESS); bool srcTracked = (hc::am_memtracker_getinfo(&srcPtrInfo, src) == AM_SUCCESS); + + // Some code in HCC and in printPointerInfo uses _sizeBytes==0 as an indication ptr is not valid, so check it here: + if (!dstTracked) { + assert (dstPtrInfo._sizeBytes == 0); + } + if (!srcTracked) { + assert (srcPtrInfo._sizeBytes == 0); + } + + hc::hcCommandKind hcCopyDir; ihipCtx_t *copyDevice; bool forceUnpinnedCopy; @@ -1831,12 +1855,8 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes, dst, dstPtrInfo._appId, dstPtrInfo._isInDeviceMem, src, srcPtrInfo._appId, srcPtrInfo._isInDeviceMem, sizeBytes, hcMemcpyStr(hcCopyDir), forceUnpinnedCopy); - tprintf (DB_COPY, " dst=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d\n", - dst, dstPtrInfo._hostPointer, dstPtrInfo._devicePointer, dstPtrInfo._sizeBytes, - dstPtrInfo._appId, dstTracked, dstPtrInfo._isInDeviceMem); - tprintf (DB_COPY, " src=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d\n", - src, srcPtrInfo._hostPointer, srcPtrInfo._devicePointer, srcPtrInfo._sizeBytes, - srcPtrInfo._appId, srcTracked, srcPtrInfo._isInDeviceMem); + printPointerInfo(DB_COPY, " dst", dst, dstPtrInfo); + printPointerInfo(DB_COPY, " src", src, srcPtrInfo); this->ensureHaveQueue(crit); @@ -1921,12 +1941,8 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes dst, dstPtrInfo._appId, dstPtrInfo._isInDeviceMem, src, srcPtrInfo._appId, srcPtrInfo._isInDeviceMem, sizeBytes, hcMemcpyStr(hcCopyDir), forceUnpinnedCopy); - tprintf (DB_COPY, " dst=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d\n", - dst, dstPtrInfo._hostPointer, dstPtrInfo._devicePointer, dstPtrInfo._sizeBytes, - dstPtrInfo._appId, dstTracked, dstPtrInfo._isInDeviceMem); - tprintf (DB_COPY, " src=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d\n", - src, srcPtrInfo._hostPointer, srcPtrInfo._devicePointer, srcPtrInfo._sizeBytes, - srcPtrInfo._appId, srcTracked, srcPtrInfo._isInDeviceMem); + printPointerInfo(DB_COPY, " dst", dst, dstPtrInfo); + printPointerInfo(DB_COPY, " src", src, srcPtrInfo); // "tracked" really indicates if the pointer's virtual address is available in the GPU address space. // If both pointers are not tracked, we need to fall back to a sync copy. diff --git a/projects/clr/hipamd/src/hip_memory.cpp b/projects/clr/hipamd/src/hip_memory.cpp index 314dee97dc..b8a1d1646a 100644 --- a/projects/clr/hipamd/src/hip_memory.cpp +++ b/projects/clr/hipamd/src/hip_memory.cpp @@ -35,6 +35,14 @@ THE SOFTWARE. // Memory // // +// +//HIP uses several "app*" fields HC memory tracker to track state necessary for the HIP API. +//_appId : DeviceID. For device mem, this is device where the memory is physically allocated. +// For host or registered mem, this is the current device when the memory is allocated or registered. This device will have a GPUVM mapping for the host mem. +// +//_appAllocationFlags : These are flags provided by the user when allocation is performed. They are returned to user in hipHostGetFlags and other APIs. +// TODO - add more info here when available. +// hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr) { HIP_INIT_API(attributes, ptr); @@ -78,6 +86,7 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr) return ihipLogStatus(e); } + hipError_t hipHostGetDevicePointer(void **devicePointer, void *hostPointer, unsigned flags) { HIP_INIT_API(devicePointer, hostPointer, flags); @@ -102,6 +111,7 @@ hipError_t hipHostGetDevicePointer(void **devicePointer, void *hostPointer, unsi return ihipLogStatus(e); } + hipError_t hipMalloc(void** ptr, size_t sizeBytes) { HIP_INIT_API(ptr, sizeBytes); @@ -227,16 +237,20 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) return ihipLogStatus(hip_status); } +// Deprecated function: hipError_t hipMallocHost(void** ptr, size_t sizeBytes) { return hipHostMalloc(ptr, sizeBytes, 0); } + +// Deprecated function: hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) { return hipHostMalloc(ptr, sizeBytes, flags); }; + // width in bytes hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) { @@ -374,6 +388,8 @@ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) return ihipLogStatus(hip_status); } + +// TODO - need to fix several issues here related to P2P access, host memory fallback. hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags) { HIP_INIT_API(hostPtr, sizeBytes, flags); @@ -406,7 +422,7 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags) am_status = hc::am_memory_host_lock(device->_acc, hostPtr, sizeBytes, &vecAcc[0], vecAcc.size()); hc::am_memtracker_update(hostPtr, device->_deviceId, flags); - tprintf(DB_MEM, " %s registered ptr=%p\n", __func__, hostPtr); + tprintf(DB_MEM, " %s registered ptr=%p and allowed access to %zu peers\n", __func__, hostPtr, vecAcc.size()); if(am_status == AM_SUCCESS){ hip_status = hipSuccess; } else { @@ -605,6 +621,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind return ihipLogStatus(e); } + hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) { HIP_INIT_CMD_API(dst, src, sizeBytes); @@ -626,6 +643,7 @@ hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) return ihipLogStatus(e); } + hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) { HIP_INIT_CMD_API(dst, src, sizeBytes); @@ -647,6 +665,7 @@ hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) return ihipLogStatus(e); } + hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes) { HIP_INIT_CMD_API(dst, src, sizeBytes); @@ -668,6 +687,7 @@ hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeByte return ihipLogStatus(e); } + hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) { HIP_INIT_CMD_API(dst, src, sizeBytes); diff --git a/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostRegister.cpp b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostRegister.cpp index 6c81ec0d91..eae73e1a65 100644 --- a/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostRegister.cpp +++ b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostRegister.cpp @@ -45,17 +45,13 @@ int main(){ A[i] = float(1); } - // Copy to B, this should be optimal pinned malloc copy: - float *B; - HIPCHECK(hipMalloc(&B, size)); - HIPCHECK(hipMemcpy(B, A, size, hipMemcpyHostToDevice)); - for(int i=0;i Date: Fri, 17 Feb 2017 17:14:55 -0600 Subject: [PATCH 16/23] Add first step to a "registerd" mode in hipBusBandwidth. [ROCm/clr commit: f23b5a1f90536934a3e8706e45aa0d51b5545b1b] --- .../hipBusBandwidth/hipBusBandwidth.cpp | 163 +++++++++++------- 1 file changed, 104 insertions(+), 59 deletions(-) diff --git a/projects/clr/hipamd/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp b/projects/clr/hipamd/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp index 7cb3e7908e..a1b2fd1705 100644 --- a/projects/clr/hipamd/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp +++ b/projects/clr/hipamd/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp @@ -6,9 +6,12 @@ #include "ResultDatabase.h" +enum MallocMode {MallocPinned, MallocUnpinned, MallocRegistered}; + // Cmdline parms: bool p_verbose = false; -bool p_pinned = true; +MallocMode p_malloc_mode = MallocPinned; +int p_numa_ctl = -1; int p_iterations = 10; int p_beatsperiteration=1; int p_device = 0; @@ -21,7 +24,7 @@ bool p_h2d = true; bool p_d2h = true; bool p_bidir = true; - +#define NO_CHECK #define CHECK_HIP_ERROR() \ @@ -36,6 +39,14 @@ bool p_bidir = true; } +std::string mallocModeString(int mallocMode) { + switch (mallocMode) { + case MallocPinned : return "pinned"; + case MallocUnpinned: return "unpinned"; + case MallocRegistered: return "registered"; + default: return "mallocmode-UNKNOWN"; + }; +}; // **************************************************************************** int sizeToBytes(int size) { @@ -106,7 +117,7 @@ void RunBenchmark_H2D(ResultDatabase &resultDB) // Create some host memory pattern float *hostMem = NULL; - if (p_pinned) + if (p_malloc_mode == MallocPinned) { hipHostMalloc((void**)&hostMem, sizeof(float) * numMaxFloats); while (hipGetLastError() != hipSuccess) @@ -116,20 +127,29 @@ void RunBenchmark_H2D(ResultDatabase &resultDB) --nSizes; if (nSizes < 1) { - std::cerr << "Error: Couldn't allocated any pinned buffer\n"; + std::cerr << "Error: Couldn't allocate any pinned buffer\n"; return; } numMaxFloats = 1024 * (sizes[nSizes-1]) / 4; hipHostMalloc((void**)&hostMem, sizeof(float) * numMaxFloats); } } - else + else if (p_malloc_mode == MallocUnpinned) { if (p_alignedhost) { hostMem = (float*)aligned_alloc(p_alignedhost, numMaxFloats*sizeof(float)); } else { hostMem = new float[numMaxFloats]; } + } + else if (p_malloc_mode == MallocRegistered) + { + if (p_numa_ctl == -1) { + hostMem = (float*)malloc(numMaxFloats*sizeof(float)); + } + + hipHostRegister(hostMem, numMaxFloats * sizeof(float), 0); + CHECK_HIP_ERROR(); } for (int i = 0; i < numMaxFloats; i++) @@ -146,7 +166,7 @@ void RunBenchmark_H2D(ResultDatabase &resultDB) --nSizes; if (nSizes < 1) { - std::cerr << "Error: Couldn't allocated any device buffer\n"; + std::cerr << "Error: Couldn't allocate any device buffer\n"; return; } numMaxFloats = 1024 * (sizes[nSizes-1]) / 4; @@ -199,8 +219,8 @@ void RunBenchmark_H2D(ResultDatabase &resultDB) } else { sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str()); } - resultDB.AddResult(std::string("H2D_Bandwidth") + (p_pinned ? "_Pinned" : "_Unpinned"), sizeStr, "GB/sec", speed); - resultDB.AddResult(std::string("H2D_Time") + (p_pinned ? "_Pinned" : "_Unpinned"), sizeStr, "ms", t); + resultDB.AddResult(std::string("H2D_Bandwidth") + "_" + mallocModeString(p_malloc_mode), sizeStr, "GB/sec", speed); + resultDB.AddResult(std::string("H2D_Time") + mallocModeString(p_malloc_mode), sizeStr, "ms", t); if (p_onesize) { break; @@ -212,6 +232,8 @@ void RunBenchmark_H2D(ResultDatabase &resultDB) numMaxFloats = sizeToBytes(p_onesize) / sizeof(float); } +#ifndef NO_CHECK + // Check. First reset the host memory, then copy-back result. Then compare against original ref value. for (int i = 0; i < numMaxFloats; i++) { @@ -225,24 +247,36 @@ void RunBenchmark_H2D(ResultDatabase &resultDB) printf ("error: H2D. i=%d reference:%6.f != copyback:%6.2f\n", i, ref, hostMem[i]); } } +#endif // Cleanup hipFree((void*)device); CHECK_HIP_ERROR(); - if (p_pinned) - { + switch (p_malloc_mode) { + case MallocPinned: hipHostFree((void*)hostMem); CHECK_HIP_ERROR(); - } - else - { + break; + + case MallocUnpinned: if (p_alignedhost) { delete[] hostMem; } else { free(hostMem); } + break; + + case MallocRegistered: + hipHostUnregister(hostMem); + CHECK_HIP_ERROR(); + free(hostMem); + break; + default: + assert(0); } + + hipEventDestroy(start); hipEventDestroy(stop); } @@ -257,38 +291,40 @@ void RunBenchmark_D2H(ResultDatabase &resultDB) // Create some host memory pattern float *hostMem1; float *hostMem2; - if (p_pinned) + if (p_malloc_mode == MallocPinned) { hipHostMalloc((void**)&hostMem1, sizeof(float)*numMaxFloats); hipError_t err1 = hipGetLastError(); hipHostMalloc((void**)&hostMem2, sizeof(float)*numMaxFloats); hipError_t err2 = hipGetLastError(); - while (err1 != hipSuccess || err2 != hipSuccess) - { - // free the first buffer if only the second failed - if (err1 == hipSuccess) - hipHostFree((void*)hostMem1); + while (err1 != hipSuccess || err2 != hipSuccess) + { + // free the first buffer if only the second failed + if (err1 == hipSuccess) + hipHostFree((void*)hostMem1); - // drop the size and try again - if (p_verbose) std::cout << " - dropping size allocating pinned mem\n"; - --nSizes; - if (nSizes < 1) - { - std::cerr << "Error: Couldn't allocated any pinned buffer\n"; - return; - } - numMaxFloats = 1024 * (sizes[nSizes-1]) / 4; - hipHostMalloc((void**)&hostMem1, sizeof(float)*numMaxFloats); - err1 = hipGetLastError(); - hipHostMalloc((void**)&hostMem2, sizeof(float)*numMaxFloats); - err2 = hipGetLastError(); - } - } - else + // drop the size and try again + if (p_verbose) std::cout << " - dropping size allocating pinned mem\n"; + --nSizes; + if (nSizes < 1) + { + std::cerr << "Error: Couldn't allocate any pinned buffer\n"; + return; + } + numMaxFloats = 1024 * (sizes[nSizes-1]) / 4; + hipHostMalloc((void**)&hostMem1, sizeof(float)*numMaxFloats); + err1 = hipGetLastError(); + hipHostMalloc((void**)&hostMem2, sizeof(float)*numMaxFloats); + err2 = hipGetLastError(); + } + } + else if (p_malloc_mode == MallocUnpinned) { hostMem1 = new float[numMaxFloats]; hostMem2 = new float[numMaxFloats]; } + + for (int i=0; i Date: Fri, 10 Mar 2017 15:04:46 -0600 Subject: [PATCH 17/23] Refactor registered memory calls. [ROCm/clr commit: b7acb85fa81844b84f407c8524ba1e4bb86dca7c] --- .../include/hip/hcc_detail/hip_runtime_api.h | 11 + .../hipBusBandwidth/hipBusBandwidth.cpp | 52 +++- projects/clr/hipamd/src/hip_memory.cpp | 233 +++++++++--------- 3 files changed, 176 insertions(+), 120 deletions(-) diff --git a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h index 080f82d9ed..7f85aad28d 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h @@ -858,6 +858,8 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr) * @param[out] ptr Pointer to the allocated memory * @param[in] size Requested memory size * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * * @return #hipSuccess * * @see hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, hipMalloc3DArray, hipHostFree, hipHostMalloc @@ -870,6 +872,8 @@ hipError_t hipMalloc(void** ptr, size_t size) ; * @param[out] ptr Pointer to the allocated host pinned memory * @param[in] size Requested memory size * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * * @return #hipSuccess, #hipErrorMemoryAllocation * * @deprecated use hipHostMalloc() instead @@ -883,6 +887,8 @@ hipError_t hipMallocHost(void** ptr, size_t size) __attribute__((deprecated("use * @param[in] size Requested memory size * @param[in] flags Type of host memory allocation * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * * @return #hipSuccess, #hipErrorMemoryAllocation * * @see hipSetDeviceFlags, hipHostFree @@ -896,6 +902,8 @@ hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags) ; * @param[in] size Requested memory size * @param[in] flags Type of host memory allocation * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * * @return #hipSuccess, #hipErrorMemoryAllocation * * @deprecated use hipHostMalloc() instead @@ -980,6 +988,9 @@ hipError_t hipHostUnregister(void* hostPtr) ; * @param[out] pitch Pitch for allocation (in bytes) * @param[in] width Requested pitched allocation width (in bytes) * @param[in] height Requested pitched allocation height + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * * @return Error code * * @see hipMalloc, hipFree, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D, hipMalloc3DArray, hipHostMalloc diff --git a/projects/clr/hipamd/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp b/projects/clr/hipamd/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp index a1b2fd1705..09f78543c9 100644 --- a/projects/clr/hipamd/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp +++ b/projects/clr/hipamd/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp @@ -24,7 +24,7 @@ bool p_h2d = true; bool p_d2h = true; bool p_bidir = true; -#define NO_CHECK +//#define NO_CHECK #define CHECK_HIP_ERROR() \ @@ -151,6 +151,10 @@ void RunBenchmark_H2D(ResultDatabase &resultDB) hipHostRegister(hostMem, numMaxFloats * sizeof(float), 0); CHECK_HIP_ERROR(); } + else + { + assert(0); + } for (int i = 0; i < numMaxFloats; i++) { @@ -323,6 +327,22 @@ void RunBenchmark_D2H(ResultDatabase &resultDB) hostMem1 = new float[numMaxFloats]; hostMem2 = new float[numMaxFloats]; } + else if (p_malloc_mode == MallocRegistered) + { + if (p_numa_ctl == -1) { + hostMem1 = (float*)malloc(numMaxFloats*sizeof(float)); + hostMem2 = (float*)malloc(numMaxFloats*sizeof(float)); + } + + hipHostRegister(hostMem1, numMaxFloats * sizeof(float), 0); + CHECK_HIP_ERROR(); + hipHostRegister(hostMem2, numMaxFloats * sizeof(float), 0); + CHECK_HIP_ERROR(); + } + else + { + assert(0); + } for (int i=0; i + + +// Internal HIP APIS: +namespace hip_internal { + +hipError_t memcpyAsync (void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) +{ + hipError_t e = hipSuccess; + + stream = ihipSyncAndResolveStream(stream); + + + if ((dst == NULL) || (src == NULL)) { + e= hipErrorInvalidValue; + } else if (stream) { + try { + stream->locked_copyAsync(dst, src, sizeBytes, kind); + } + catch (ihipException ex) { + e = ex._code; + } + } else { + e = hipErrorInvalidValue; + } + + return e; +} + +// return 0 on success or -1 on error: +int sharePtr(void *ptr, ihipCtx_t *ctx, unsigned hipFlags) +{ + int ret = 0; + + auto device = ctx->getWriteableDevice(); + + hc::am_memtracker_update(ptr, device->_deviceId, hipFlags); + int peerCnt=0; + { + LockedAccessor_CtxCrit_t crit(ctx->criticalData()); + // the peerCnt always stores self so make sure the trace actually + peerCnt = crit->peerCnt(); + tprintf(DB_MEM, " allow access to %d other peer(s)\n", peerCnt-1); + if (peerCnt > 1) { + + //printf ("peer self access\n"); + + // TODOD - remove me: + for (auto iter = crit->_peers.begin(); iter!=crit->_peers.end(); iter++) { + tprintf (DB_MEM, " allow access to peer: %s%s\n", (*iter)->toString().c_str(), (iter == crit->_peers.begin()) ? " (self)":""); + }; + + hsa_status_t s = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, ptr); + if (s != HSA_STATUS_SUCCESS) { + ret = -1; + } + } + } + + return ret; +} + + + + +// Allocate a new pointer with am_alloc and share with all valid peers. +// Returns null-ptr if a memory error occurs (either allocation or sharing) +void * allocAndSharePtr(const char *msg, size_t sizeBytes, ihipCtx_t *ctx, unsigned amFlags, unsigned hipFlags) +{ + + void *ptr = nullptr; + + auto device = ctx->getWriteableDevice(); + + ptr = hc::am_alloc(sizeBytes, device->_acc, amFlags); + tprintf(DB_MEM, " alloc %s ptr:%p size:%zu on dev:%d\n", + msg, ptr, sizeBytes, device->_deviceId); + + if (ptr != nullptr) { + int r = sharePtr(ptr, ctx, hipFlags); + if (r != 0) { + ptr = nullptr; + } + } + + return ptr; +} + + +} // end namespace hip_internal + //------------------------------------------------------------------------------------------------- //------------------------------------------------------------------------------------------------- // Memory @@ -128,37 +218,8 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) if (ctx) { auto device = ctx->getWriteableDevice(); - const unsigned am_flags = 0; - *ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags); + *ptr = hip_internal::allocAndSharePtr("device_mem", sizeBytes, ctx, 0/*amFlags*/, 0/*hipFlags*/); - - if (sizeBytes && (*ptr == NULL)) { - hip_status = hipErrorMemoryAllocation; - } else { - hc::am_memtracker_update(*ptr, device->_deviceId, 0); - int peerCnt=0; - { - LockedAccessor_CtxCrit_t crit(ctx->criticalData()); - // the peerCnt always stores self so make sure the trace actually - peerCnt = crit->peerCnt(); - tprintf(DB_MEM, " allocated device_mem ptr:%p size:%zu on dev:%d and allow access to %d other peer(s)\n", - *ptr, sizeBytes, device->_deviceId, peerCnt-1); - if (peerCnt > 1) { - - //printf ("peer self access\n"); - - // TODOD - remove me: - for (auto iter = crit->_peers.begin(); iter!=crit->_peers.end(); iter++) { - tprintf (DB_MEM, " allow access to peer: %s%s\n", (*iter)->toString().c_str(), (iter == crit->_peers.begin()) ? " (self)":""); - }; - - hsa_status_t e = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); - if (e != HSA_STATUS_SUCCESS) { - hip_status = hipErrorMemoryAllocation; - } - } - } - } } else { hip_status = hipErrorMemoryAllocation; } @@ -198,39 +259,16 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) } else { auto device = ctx->getWriteableDevice(); - if(HIP_COHERENT_HOST_ALLOC){ - // Force to allocate finedgrained system memory - *ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned); - if(sizeBytes < 1 && (*ptr == NULL)){ - hip_status = hipErrorMemoryAllocation; - } else { - // TODO - should OR in flags here? - hc::am_memtracker_update(*ptr, device->_deviceId, amHostCoherent); - } - tprintf(DB_MEM, " %s: finegrained system memory ptr=%p\n", __func__, *ptr); - } - else{ - // TODO - am_alloc requires writeable __acc, perhaps could be refactored? - // TODO - hipHostMallocMapped is be ignored on ROCM - all memory is mapped to host address space as WC. - *ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned); - if (*ptr == NULL) { - hip_status = hipErrorMemoryAllocation; - } else { - hc::am_memtracker_update(*ptr, device->_deviceId, flags); - // TODO-hipHostMallocPortable should map the host memory into all contexts, regardless of peer status. - int peerCnt=0; - { - LockedAccessor_CtxCrit_t crit(ctx->criticalData()); - peerCnt = crit->peerCnt(); - if (peerCnt > 1) { - hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); - } - } - tprintf(DB_MEM, "allocated pinned_host ptr:%p size:%zu on dev:%d and allow access to %d other peer(s)\n", *ptr, sizeBytes, device->_deviceId, peerCnt-1); - } - } + unsigned amFlags = HIP_COHERENT_HOST_ALLOC ? amHostCoherent : amHostPinned; + + *ptr = hip_internal::allocAndSharePtr(HIP_COHERENT_HOST_ALLOC ? "finegrained_host":"pinned_host", + sizeBytes, ctx, amFlags, flags); + if(sizeBytes && (*ptr == NULL)){ + hip_status = hipErrorMemoryAllocation; + } } } + if (HIP_SYNC_HOST_ALLOC) { hipDeviceSynchronize(); } @@ -272,22 +310,11 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height auto device = ctx->getWriteableDevice(); const unsigned am_flags = 0; - *ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags); + *ptr = hip_internal::allocAndSharePtr("device_pitch", sizeBytes, ctx, am_flags, 0); if (sizeBytes && (*ptr == NULL)) { hip_status = hipErrorMemoryAllocation; - } else { - hc::am_memtracker_update(*ptr, device->_deviceId, 0); - { - LockedAccessor_CtxCrit_t crit(ctx->criticalData()); - if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved: - hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); - if (hsa_status != HSA_STATUS_SUCCESS) { - hip_status = hipErrorMemoryAllocation; - } - } - } - } + } } else { hip_status = hipErrorMemoryAllocation; } @@ -321,41 +348,31 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, void ** ptr = &array[0]->data; if (ctx) { - auto device = ctx->getWriteableDevice(); const unsigned am_flags = 0; const size_t size = width*height; + size_t allocSize = 0; switch(desc->f) { case hipChannelFormatKindSigned: - *ptr = hc::am_alloc(size*sizeof(int), device->_acc, am_flags); + allocSize = size * sizeof(int); break; case hipChannelFormatKindUnsigned: - *ptr = hc::am_alloc(size*sizeof(unsigned int), device->_acc, am_flags); + allocSize = size * sizeof(unsigned int); break; case hipChannelFormatKindFloat: - *ptr = hc::am_alloc(size*sizeof(float), device->_acc, am_flags); + allocSize = size * sizeof(float); break; case hipChannelFormatKindNone: - *ptr = hc::am_alloc(size*sizeof(size_t), device->_acc, am_flags); + allocSize = size * sizeof(size_t); break; default: hip_status = hipErrorUnknown; break; } + *ptr = hip_internal::allocAndSharePtr("device_array", allocSize, ctx, am_flags, 0); if (size && (*ptr == NULL)) { hip_status = hipErrorMemoryAllocation; - } else { - hc::am_memtracker_update(*ptr, device->_deviceId, 0); - { - LockedAccessor_CtxCrit_t crit(ctx->criticalData()); - if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved: - hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); - if (hsa_status != HSA_STATUS_SUCCESS) { - hip_status = hipErrorMemoryAllocation; - } - } - } - } + } } else { hip_status = hipErrorMemoryAllocation; @@ -409,12 +426,13 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags) hip_status = hipErrorHostMemoryAlreadyRegistered; } else { auto ctx = ihipGetTlsDefaultCtx(); - if(hostPtr == NULL){ + if (hostPtr == NULL) { return ihipLogStatus(hipErrorInvalidValue); } + //TODO-test : multi-gpu access to registered host memory. if (ctx) { - auto device = ctx->getWriteableDevice(); if(flags == hipHostRegisterDefault || flags == hipHostRegisterPortable || flags == hipHostRegisterMapped){ + auto device = ctx->getWriteableDevice(); std::vectorvecAcc; for(int i=0;i_acc); @@ -711,32 +729,6 @@ hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) -// Internal copy sync: -namespace hip_internal { - -hipError_t memcpyAsync (void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) -{ - hipError_t e = hipSuccess; - - stream = ihipSyncAndResolveStream(stream); - - - if ((dst == NULL) || (src == NULL)) { - e= hipErrorInvalidValue; - } else if (stream) { - try { - stream->locked_copyAsync(dst, src, sizeBytes, kind); - } - catch (ihipException ex) { - e = ex._code; - } - } else { - e = hipErrorInvalidValue; - } - - return e; -} -} // end namespace hip_internal hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) @@ -1012,6 +1004,7 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes ) return ihipLogStatus(e); } + hipError_t hipMemGetInfo (size_t *free, size_t *total) { HIP_INIT_API(free, total); @@ -1067,6 +1060,7 @@ hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) return ihipLogStatus(e); } + hipError_t hipFree(void* ptr) { HIP_INIT_API(ptr); @@ -1094,6 +1088,7 @@ hipError_t hipFree(void* ptr) return ihipLogStatus(hipStatus); } + hipError_t hipHostFree(void* ptr) { HIP_INIT_API(ptr); From 72b420bab473001bb28c6aa0d4ebfd1ae7660b20 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Sun, 12 Mar 2017 09:51:33 -0500 Subject: [PATCH 18/23] Update hiphostregister test. Move check to correct place. [ROCm/clr commit: 9adbbd2980b326a9e55a4021e3252e3ba3fe29d5] --- .../tests/src/runtimeApi/memory/hipHostRegister.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostRegister.cpp b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostRegister.cpp index eae73e1a65..1a1319c500 100644 --- a/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostRegister.cpp +++ b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostRegister.cpp @@ -51,24 +51,28 @@ int main(){ HIPCHECK(hipHostGetDevicePointer((void**)&Ad[i], A, 0)); } - // Use device pointer inside a kernel: + // Reference the registered device pointer Ad from inside the kernel: for(int i=0;i Date: Mon, 13 Mar 2017 11:16:05 -0500 Subject: [PATCH 19/23] make sure the inter-thread intrinsics are working post hawaii Change-Id: I30ea5284c2160276f5bc0f937dfd386ca8640ce8 [ROCm/clr commit: af56898ea099bac2c2dc40a71c4b2826f55f33ae] --- projects/clr/hipamd/include/hip/hcc_detail/hip_runtime.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime.h b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime.h index 6acc604909..332e9bab46 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -226,7 +226,7 @@ __device__ int __all( int input); __device__ int __any( int input); __device__ unsigned long long int __ballot( int input); -#if __HIP_ARCH_GFX803__ == 1 +#if __HIP_ARCH_GFX701__ == 0 // warp shuffle functions #ifdef __cplusplus From 5cf4c4e440c1b68c7572ee9c6c0233dfa3b05870 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Tue, 14 Mar 2017 13:51:38 +0530 Subject: [PATCH 20/23] Add gfx900 support Change-Id: I3be2fbdcb6d3fa776c4fe668586c67245a1323f2 [ROCm/clr commit: 1b92ae9917d7ac3fd3245d30a2c1bb8ee5c4dc5c] --- projects/clr/hipamd/CMakeLists.txt | 2 +- projects/clr/hipamd/bin/hipcc | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/projects/clr/hipamd/CMakeLists.txt b/projects/clr/hipamd/CMakeLists.txt index ce0eeb362d..1ba58496f4 100644 --- a/projects/clr/hipamd/CMakeLists.txt +++ b/projects/clr/hipamd/CMakeLists.txt @@ -189,7 +189,7 @@ if(HIP_PLATFORM STREQUAL "hcc") execute_process(COMMAND ${HCC_HOME}/bin/hcc-config --ldflags OUTPUT_VARIABLE HCC_LD_FLAGS) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${HCC_LD_FLAGS} -Wl,-Bsymbolic") - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --amdgpu-target=gfx701 --amdgpu-target=gfx801 --amdgpu-target=gfx802 --amdgpu-target=gfx803") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --amdgpu-target=gfx701 --amdgpu-target=gfx801 --amdgpu-target=gfx802 --amdgpu-target=gfx803 --amdgpu-target=gfx900") add_library(hip_hcc SHARED ${SOURCE_FILES_RUNTIME}) target_link_libraries(hip_hcc PRIVATE hc_am) add_library(hip_hcc_static STATIC ${SOURCE_FILES_RUNTIME}) diff --git a/projects/clr/hipamd/bin/hipcc b/projects/clr/hipamd/bin/hipcc index 7e15d6b2e6..bd6ce9b4e4 100755 --- a/projects/clr/hipamd/bin/hipcc +++ b/projects/clr/hipamd/bin/hipcc @@ -74,6 +74,7 @@ $target_gfx701 = 0; $target_gfx801 = 0; $target_gfx802 = 0; $target_gfx803 = 0; +$target_gfx900 = 0; if ($HIP_PLATFORM eq "hcc") { $HSA_PATH=$ENV{'HSA_PATH'} // "/opt/rocm/hsa"; @@ -261,6 +262,10 @@ foreach $arg (@ARGV) { $target_gfx803 = 1; } + if($arg eq '--amdgpu-target=gfx900') + { + $target_gfx900 = 1; + } if(($trimarg eq '-stdlib=libstdc++') and ($setStdLib eq 0)) { $HIPCXXFLAGS .= $HCC_WA_FLAGS; @@ -343,9 +348,13 @@ if($HIP_PLATFORM eq "hcc"){ $HIPCXXFLAGS .= " -D__HIP_ARCH_GFX803__=1 "; $ENV{HCC_EXTRA_LIBRARIES_GFX803}="$HIP_PATH/lib/hip_hc_gfx803.ll\n"; } - if ($target_gfx701 eq 0 and $target_gfx801 eq 0 and $target_gfx802 eq 0 and $target_gfx803 eq 0) + if ($target_gfx900 eq 1) { + $HIPLDFLAGS .= " --amdgpu-target=gfx900"; + $HIPCXXFLAGS .= " -D__HIP_ARCH_GFX900__=1 "; + } + if ($target_gfx701 eq 0 and $target_gfx801 eq 0 and $target_gfx802 eq 0 and $target_gfx803 eq 0 and $target_gfx900 eq 0) { - $HIPLDFLAGS .= " --amdgpu-target=gfx701 --amdgpu-target=gfx801 --amdgpu-target=gfx802 --amdgpu-target=gfx803"; + $HIPLDFLAGS .= " --amdgpu-target=gfx701 --amdgpu-target=gfx801 --amdgpu-target=gfx802 --amdgpu-target=gfx803 --amdgpu-target=gfx900"; $ENV{HCC_EXTRA_LIBRARIES_GFX803}="$HIP_PATH/lib/hip_hc_gfx803.ll\n"; } From 59f1401f5427533354155875cf001bbee4f5a5d0 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Tue, 14 Mar 2017 14:25:34 +0530 Subject: [PATCH 21/23] hipcc: Support targets specified via HCC_AMDGPU_TARGET Change-Id: I69fda40d9f666325d377f4b4335e7ee693069214 [ROCm/clr commit: d29a0979053ff52339e6878be8ea4ac4bdf52d08] --- projects/clr/hipamd/bin/hipcc | 84 +++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 29 deletions(-) diff --git a/projects/clr/hipamd/bin/hipcc b/projects/clr/hipamd/bin/hipcc index bd6ce9b4e4..7179517f7b 100755 --- a/projects/clr/hipamd/bin/hipcc +++ b/projects/clr/hipamd/bin/hipcc @@ -23,8 +23,8 @@ use File::Basename; # HSA_PATH : Path to HSA dir (default /opt/rocm/hsa). Used on AMD platforms only. if(scalar @ARGV == 0){ -print "No Arguments passed, exiting ...\n"; -exit(-1); + print "No Arguments passed, exiting ...\n"; + exit(-1); } #--- @@ -190,18 +190,18 @@ if ($verbose & 0x4) { # Handle code object generation my $ISACMD=""; if($HIP_PLATFORM eq "hcc"){ - $ISACMD .= "set ROCM_PATH=$ROCM_PATH && set ROCM_TARGET=$ROCM_TARGET && $HIP_PATH/bin/hccgenco.sh "; - if($ARGV[0] eq "--genco"){ - foreach $isaarg (@ARGV[1..$#ARGV]){ - $ISACMD .= " "; - $ISACMD .= $isaarg; + $ISACMD .= "set ROCM_PATH=$ROCM_PATH && set ROCM_TARGET=$ROCM_TARGET && $HIP_PATH/bin/hccgenco.sh "; + if($ARGV[0] eq "--genco"){ + foreach $isaarg (@ARGV[1..$#ARGV]){ + $ISACMD .= " "; + $ISACMD .= $isaarg; + } + if ($verbose & 0x1) { + print "hipcc-cmd: ", $ISACMD, "\n"; + } + system($ISACMD) and die(); + exit(0); } - if ($verbose & 0x1) { - print "hipcc-cmd: ", $ISACMD, "\n"; - } - system($ISACMD) and die(); - exit(0); - } } if(($HIP_PLATFORM eq "hcc")){ @@ -211,18 +211,18 @@ if(($HIP_PLATFORM eq "hcc")){ } if($HIP_PLATFORM eq "nvcc"){ - $ISACMD .= "$HIP_PATH/bin/hipcc -ptx "; - if($ARGV[0] eq "--genco"){ - foreach $isaarg (@ARGV[1..$#ARGV]){ - $ISACMD .= " "; - $ISACMD .= $isaarg; + $ISACMD .= "$HIP_PATH/bin/hipcc -ptx "; + if($ARGV[0] eq "--genco"){ + foreach $isaarg (@ARGV[1..$#ARGV]){ + $ISACMD .= " "; + $ISACMD .= $isaarg; + } + if ($verbose & 0x1) { + print "hipcc-cmd: ", $ISACMD, "\n"; + } + system($ISACMD) and die(); + exit(0); } - if ($verbose & 0x1) { - print "hipcc-cmd: ", $ISACMD, "\n"; - } - system($ISACMD) and die(); - exit(0); - } } my $toolArgs = ""; # arguments to pass to the hcc or nvcc tool @@ -248,24 +248,25 @@ foreach $arg (@ARGV) } if($arg eq '--amdgpu-target=gfx701') { - $target_gfx701 = 1; + $target_gfx701 = 1; } if($arg eq '--amdgpu-target=gfx801') { - $target_gfx801 = 1; + $target_gfx801 = 1; } if($arg eq '--amdgpu-target=gfx802') { - $target_gfx802 = 1; + $target_gfx802 = 1; } if($arg eq '--amdgpu-target=gfx803') { - $target_gfx803 = 1; + $target_gfx803 = 1; } if($arg eq '--amdgpu-target=gfx900') { - $target_gfx900 = 1; + $target_gfx900 = 1; } + if(($trimarg eq '-stdlib=libstdc++') and ($setStdLib eq 0)) { $HIPCXXFLAGS .= $HCC_WA_FLAGS; @@ -325,6 +326,29 @@ foreach $arg (@ARGV) } $toolArgs .= " $arg" unless $swallowArg; } +foreach my $target (split(/,/, $ENV{HCC_AMDGPU_TARGET})) +{ + if($target eq 'gfx701') + { + $target_gfx701 = 1; + } + if($target eq 'gfx801') + { + $target_gfx801 = 1; + } + if($target eq 'gfx802') + { + $target_gfx802 = 1; + } + if($target eq 'gfx803') + { + $target_gfx803 = 1; + } + if($target eq 'gfx900') + { + $target_gfx900 = 1; + } +} if($HIP_PLATFORM eq "hcc"){ @@ -416,3 +440,5 @@ if ($runCmd) { } system ("$CMD") and die (); } + +# vim: ts=4:sw=4:expandtab:smartindent From f400aa48f53774acc007c6b9a1426e1d2faf883d Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Tue, 14 Mar 2017 14:34:25 +0530 Subject: [PATCH 22/23] default to gfx803 instead of fatbin if no arch specified Change-Id: I83d56c6ede11c356d383b09d7eb3a5f08c8d8c84 [ROCm/clr commit: f32980847fb29edf8fcea31fb52894b035a67591] --- projects/clr/hipamd/bin/hipcc | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/projects/clr/hipamd/bin/hipcc b/projects/clr/hipamd/bin/hipcc index 7179517f7b..d2822fd0da 100755 --- a/projects/clr/hipamd/bin/hipcc +++ b/projects/clr/hipamd/bin/hipcc @@ -349,6 +349,10 @@ foreach my $target (split(/,/, $ENV{HCC_AMDGPU_TARGET})) $target_gfx900 = 1; } } +if ($target_gfx701 eq 0 and $target_gfx801 eq 0 and $target_gfx802 eq 0 and $target_gfx803 eq 0 and $target_gfx900 eq 0) +{ + $target_gfx803 = 1; +} if($HIP_PLATFORM eq "hcc"){ @@ -376,12 +380,6 @@ if($HIP_PLATFORM eq "hcc"){ $HIPLDFLAGS .= " --amdgpu-target=gfx900"; $HIPCXXFLAGS .= " -D__HIP_ARCH_GFX900__=1 "; } - if ($target_gfx701 eq 0 and $target_gfx801 eq 0 and $target_gfx802 eq 0 and $target_gfx803 eq 0 and $target_gfx900 eq 0) - { - $HIPLDFLAGS .= " --amdgpu-target=gfx701 --amdgpu-target=gfx801 --amdgpu-target=gfx802 --amdgpu-target=gfx803 --amdgpu-target=gfx900"; - $ENV{HCC_EXTRA_LIBRARIES_GFX803}="$HIP_PATH/lib/hip_hc_gfx803.ll\n"; - } - } if ($hasC and $HIP_PLATFORM eq 'nvcc') { From ee45f273fd9448dc87ec7e65cb7bf78e01ddbfbe Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Tue, 14 Mar 2017 15:56:18 +0530 Subject: [PATCH 23/23] 4_shfl and 5_2dshfl samples are unsupported on gfx701 Change-Id: I81eb880350f25e89573ba14c62b549c6c43f8c91 [ROCm/clr commit: f91583b2949bfd4299c29d8b641f4b419226f66d] --- projects/clr/hipamd/samples/2_Cookbook/4_shfl/Makefile | 6 +++++- projects/clr/hipamd/samples/2_Cookbook/5_2dshfl/Makefile | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/projects/clr/hipamd/samples/2_Cookbook/4_shfl/Makefile b/projects/clr/hipamd/samples/2_Cookbook/4_shfl/Makefile index 21c0e93959..56f54d9518 100644 --- a/projects/clr/hipamd/samples/2_Cookbook/4_shfl/Makefile +++ b/projects/clr/hipamd/samples/2_Cookbook/4_shfl/Makefile @@ -3,6 +3,10 @@ ifeq (,$(HIP_PATH)) HIP_PATH=../../.. endif +ifeq (gfx701, $(findstring gfx701,$(HCC_AMDGPU_TARGET))) + $(error gfx701 is not a supported device for this sample) +endif + HIPCC=$(HIP_PATH)/bin/hipcc TARGET=hcc @@ -22,7 +26,7 @@ CXX=$(HIPCC) $(EXECUTABLE): $(OBJECTS) - $(HIPCC) --amdgpu-target=gfx803 $(OBJECTS) -o $@ + $(HIPCC) $(OBJECTS) -o $@ test: $(EXECUTABLE) diff --git a/projects/clr/hipamd/samples/2_Cookbook/5_2dshfl/Makefile b/projects/clr/hipamd/samples/2_Cookbook/5_2dshfl/Makefile index 6abaf658b1..cfadb1a311 100644 --- a/projects/clr/hipamd/samples/2_Cookbook/5_2dshfl/Makefile +++ b/projects/clr/hipamd/samples/2_Cookbook/5_2dshfl/Makefile @@ -3,6 +3,10 @@ ifeq (,$(HIP_PATH)) HIP_PATH=../../.. endif +ifeq (gfx701, $(findstring gfx701,$(HCC_AMDGPU_TARGET))) + $(error gfx701 is not a supported device for this sample) +endif + HIPCC=$(HIP_PATH)/bin/hipcc TARGET=hcc @@ -22,7 +26,7 @@ CXX=$(HIPCC) $(EXECUTABLE): $(OBJECTS) - $(HIPCC) --amdgpu-target=gfx803 $(OBJECTS) -o $@ + $(HIPCC) $(OBJECTS) -o $@ test: $(EXECUTABLE)