From c25b9c6786f98d8b4f6696283f66d1a33c34836a Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Thu, 20 Jul 2017 14:41:30 +0530 Subject: [PATCH 1/5] Renable frexp(f) device math function Change-Id: I53c022b8ddf38cd17ddb42eba457b9020db66395 --- include/hip/hcc_detail/math_functions.h | 4 ++-- tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp | 2 +- tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/hip/hcc_detail/math_functions.h b/include/hip/hcc_detail/math_functions.h index 9faff2743a..79bacf274b 100644 --- a/include/hip/hcc_detail/math_functions.h +++ b/include/hip/hcc_detail/math_functions.h @@ -60,7 +60,7 @@ __device__ float fmaf(float x, float y, float z); __device__ float fmaxf(float x, float y); __device__ float fminf(float x, float y); __device__ float fmodf(float x, float y); -//__device__ float frexpf(float x, int* nptr); +__device__ float frexpf(float x, int* nptr); __device__ float hypotf(float x, float y); __device__ float ilogbf(float x); __device__ int isfinite(float a); @@ -146,7 +146,7 @@ __device__ double fma(double x, double y, double z); __device__ double fmax(double x, double y); __device__ double fmin(double x, double y); __device__ double fmod(double x, double y); -//__device__ double frexp(double x, int *nptr); +__device__ double frexp(double x, int *nptr); __device__ double hypot(double x, double y); __device__ double ilogb(double x); __device__ int isfinite(double x); diff --git a/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp b/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp index f4f7ab0479..0a81d111c2 100644 --- a/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp +++ b/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp @@ -69,7 +69,7 @@ __device__ void double_precision_math_functions() fmax(0.0, 0.0); fmin(0.0, 0.0); fmod(0.0, 1.0); -// frexp(0.0, &iX); + frexp(0.0, &iX); hypot(1.0, 0.0); ilogb(1.0); isfinite(0.0); diff --git a/tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp b/tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp index de3dec35ef..a52b1a22fe 100644 --- a/tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp +++ b/tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp @@ -70,7 +70,7 @@ __device__ void single_precision_math_functions() fmaxf(0.0f, 0.0f); fminf(0.0f, 0.0f); fmodf(0.0f, 1.0f); - //frexpf(0.0f, &iX); + frexpf(0.0f, &iX); hypotf(1.0f, 0.0f); ilogbf(1.0f); isfinite(0.0f); From a3bc662c060ce96236b633fff6a2b906abaaadb5 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Fri, 21 Jul 2017 15:50:12 -0500 Subject: [PATCH 2/5] fixed device selection during compilation to use rocm_agent_enumerator 1. Changed hipcc to use rocm_agent_enumerator 2. Changed square sample test to use device variable --- bin/hipcc | 22 ++++++++++++++++++++++ samples/0_Intro/square/square.hipref.cpp | 5 +++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/bin/hipcc b/bin/hipcc index bcd3e3a591..2dceaa295f 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -103,6 +103,28 @@ if ($HIP_PLATFORM eq "hcc") { $HIPLDFLAGS = `${HCC_HOME}/bin/hcc-config --ldflags`; + $ROCM_AGENT_ENUM = "${ROCM_PATH}/bin/rocm_agent_enumerator"; + + my $myAgents = `${ROCM_AGENT_ENUM} -t GPU`; + my @agentsLine = split('\n', $myAgents); + + foreach my $val (@agentsLine) { + if($val eq "gfx701") { + $target_gfx701 = 1; + } + if($val eq "gfx801") { + $target_gfx801 = 1; + } + if($val eq "gfx802") { + $target_gfx802 = 1; + } + if($val eq "gfx803") { + $target_gfx803 = 1; + } + if($val eq "gfx900") { + $target_gfx900 = 1; + } + } #### GCC system includes workaround #### $HCC_WA_FLAGS = " "; diff --git a/samples/0_Intro/square/square.hipref.cpp b/samples/0_Intro/square/square.hipref.cpp index e694bfb8a4..167cb135f4 100644 --- a/samples/0_Intro/square/square.hipref.cpp +++ b/samples/0_Intro/square/square.hipref.cpp @@ -54,9 +54,10 @@ int main(int argc, char *argv[]) float *A_h, *C_h; size_t N = 1000000; size_t Nbytes = N * sizeof(float); - + static int device = 0; + CHECK(hipSetDevice(device)); hipDeviceProp_t props; - CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/)); + CHECK(hipGetDeviceProperties(&props, device/*deviceID*/)); printf ("info: running on device %s\n", props.name); #ifdef __HIP_PLATFORM_HCC__ printf ("info: architecture on AMD GPU device is: %d\n",props.gcnArch); From c9f906c2cea47ed311085c280ac988bec13b8140 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Fri, 20 Oct 2017 14:49:29 +0000 Subject: [PATCH 3/5] Modify device properties to use pool API. - Also better error code checking --- src/hip_hcc.cpp | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 2c98ac804f..aa8dbd0072 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -695,26 +695,26 @@ int checkAccess(hsa_agent_t agent, hsa_amd_memory_pool_t pool) return access; } -hsa_status_t get_region_info(hsa_region_t region, void* data) +hsa_status_t get_pool_info(hsa_amd_memory_pool_t pool, void* data) { hsa_status_t err; hipDeviceProp_t* p_prop = reinterpret_cast(data); uint32_t region_segment; - // Get region segment - err = hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, ®ion_segment); + // Get pool segment + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, ®ion_segment); ErrorCheck(err); switch(region_segment) { case HSA_REGION_SEGMENT_READONLY: - err = hsa_region_get_info(region, HSA_REGION_INFO_SIZE, &(p_prop->totalConstMem)); break; - /* case HSA_REGION_SEGMENT_PRIVATE: - cout<<"PRIVATE"<totalConstMem)); break; case HSA_REGION_SEGMENT_GROUP: - err = hsa_region_get_info(region, HSA_REGION_INFO_SIZE, &(p_prop->sharedMemPerBlock)); break; + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &(p_prop->sharedMemPerBlock)); + printf ("shared_mem err=%d mem=%zu\n", err, p_prop->sharedMemPerBlock); + break; default: break; } - return HSA_STATUS_SUCCESS; + return err; } @@ -748,8 +748,10 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) // Set some defaults in case we don't find the appropriate regions: prop->totalGlobalMem = 0; prop->totalConstMem = 0; - prop-> maxThreadsPerMultiProcessor = 0; + prop->maxThreadsPerMultiProcessor = 0; prop->regsPerBlock = 0; + prop->totalConstMem = 0; + prop->sharedMemPerBlock = 0; if (_hsaAgent.handle == -1) { return hipErrorInvalidDevice; @@ -849,15 +851,18 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) prop-> maxThreadsPerMultiProcessor = prop->warpSize*max_waves_per_cu; // Get memory properties - err = hsa_agent_iterate_regions(_hsaAgent, get_region_info, prop); + err = hsa_amd_agent_iterate_memory_pools(_hsaAgent, get_pool_info, prop); + if (err == HSA_STATUS_INFO_BREAK) { + err = HSA_STATUS_SUCCESS; + } DeviceErrorCheck(err); - // Get the size of the region we are using for Accelerator Memory allocations: + // Get the size of the pool we are using for Accelerator Memory allocations: hsa_region_t *am_region = static_cast(_acc.get_hsa_am_region()); err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &prop->totalGlobalMem); DeviceErrorCheck(err); // maxSharedMemoryPerMultiProcessor should be as the same as group memory size. - // Group memory will not be paged out, so, the physical memory size is the total shared memory size, and also equal to the group region size. + // Group memory will not be paged out, so, the physical memory size is the total shared memory size, and also equal to the group pool size. prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem; // Get Max memory clock frequency From acf89b43d4a265e197f030de13dad64c977eaa54 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Fri, 20 Oct 2017 15:50:18 +0000 Subject: [PATCH 4/5] Update device properties. - clear properties to defined initial state. - enable some property flags which are now supported. --- include/hip/hcc_detail/hip_runtime.h | 3 +-- src/hip_hcc.cpp | 12 +++--------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index 379fc05f5b..9fcc02b2d7 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -108,13 +108,12 @@ extern int HIP_TRACE_API; #if defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0) // Device compile and not host compile: -//TODO-HCC enable __HIP_ARCH_HAS_ATOMICS__ when HCC supports these. // 32-bit Atomics: #define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1) #define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1) #define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (1) #define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (1) -#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (0) +#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (1) // 64-bit Atomics: #define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1) diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index aa8dbd0072..d2d28c2a93 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -745,13 +745,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) hipError_t e = hipSuccess; hsa_status_t err; - // Set some defaults in case we don't find the appropriate regions: - prop->totalGlobalMem = 0; - prop->totalConstMem = 0; - prop->maxThreadsPerMultiProcessor = 0; - prop->regsPerBlock = 0; - prop->totalConstMem = 0; - prop->sharedMemPerBlock = 0; + memset(prop, 0, sizeof(hipDeviceProp_t)); if (_hsaAgent.handle == -1) { return hipErrorInvalidDevice; @@ -882,7 +876,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) prop->arch.hasGlobalFloatAtomicExch = 1; prop->arch.hasSharedInt32Atomics = 1; prop->arch.hasSharedFloatAtomicExch = 1; - prop->arch.hasFloatAtomicAdd = 0; + prop->arch.hasFloatAtomicAdd = 1; // supported with CAS loop, but is supported prop->arch.hasGlobalInt64Atomics = 1; prop->arch.hasSharedInt64Atomics = 1; prop->arch.hasDoubles = 1; @@ -890,7 +884,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) prop->arch.hasWarpBallot = 1; prop->arch.hasWarpShuffle = 1; prop->arch.hasFunnelShift = 0; // TODO-hcc - prop->arch.hasThreadFenceSystem = 0; // TODO-hcc + prop->arch.hasThreadFenceSystem = 1; prop->arch.hasSyncThreadsExt = 0; // TODO-hcc prop->arch.hasSurfaceFuncs = 0; // TODO-hcc prop->arch.has3dGrid = 1; From dd24983571b9bbe63986a01d6409775eaf15924f Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Fri, 20 Oct 2017 13:24:04 -0700 Subject: [PATCH 5/5] Remove printf --- src/hip_hcc.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index d2d28c2a93..6a614b1826 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -710,7 +710,6 @@ hsa_status_t get_pool_info(hsa_amd_memory_pool_t pool, void* data) err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &(p_prop->totalConstMem)); break; case HSA_REGION_SEGMENT_GROUP: err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &(p_prop->sharedMemPerBlock)); - printf ("shared_mem err=%d mem=%zu\n", err, p_prop->sharedMemPerBlock); break; default: break; }