diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index 379fc05f5b..370ac2abbb 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -50,10 +50,16 @@ THE SOFTWARE. #include +// define HIP_ENABLE_PRINTF to enable printf +#ifdef HIP_ENABLE_PRINTF + #define HCC_ENABLE_ACCELERATOR_PRINTF 1 +#endif + //--- // Remainder of this file only compiles with HCC #if defined __HCC__ #include +#include "hc_printf.hpp" //TODO-HCC-GL - change this to typedef. //typedef grid_launch_parm hipLaunchParm ; @@ -108,13 +114,12 @@ extern int HIP_TRACE_API; #if defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0) // Device compile and not host compile: -//TODO-HCC enable __HIP_ARCH_HAS_ATOMICS__ when HCC supports these. // 32-bit Atomics: #define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1) #define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1) #define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (1) #define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (1) -#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (0) +#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (1) // 64-bit Atomics: #define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1) @@ -420,6 +425,20 @@ static inline __device__ void* memset(void* ptr, int val, size_t size) } +#ifdef __HCC_ACCELERATOR__ + +#ifdef HC_FEATURE_PRINTF +template +static inline __device__ void printf(const char* format, All... all) { + hc::printf(format, all...); +} +#else +template +static inline __device__ void printf(const char* format, All... all) { } +#endif + +#endif + #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE) diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 2d67c31fe7..915732bdce 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -616,7 +616,7 @@ void ihipDevice_t::locked_reset() //FIXME - Calling am_memtracker_reset is really bad since it destroyed all buffers allocated by the HCC runtime as well //such as the printf buffer. Re-initialze the printf buffer as a workaround for now. -#if (__hcc_workweek__ >= 17423) +#ifdef HC_FEATURE_PRINTF Kalmar::getContext()->initPrintfBuffer(); #endif }; @@ -700,26 +700,25 @@ int checkAccess(hsa_agent_t agent, hsa_amd_memory_pool_t pool) return access; } -hsa_status_t get_region_info(hsa_region_t region, void* data) +hsa_status_t get_pool_info(hsa_amd_memory_pool_t pool, void* data) { hsa_status_t err; hipDeviceProp_t* p_prop = reinterpret_cast(data); uint32_t region_segment; - // Get region segment - err = hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, ®ion_segment); + // Get pool segment + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, ®ion_segment); ErrorCheck(err); switch(region_segment) { case HSA_REGION_SEGMENT_READONLY: - err = hsa_region_get_info(region, HSA_REGION_INFO_SIZE, &(p_prop->totalConstMem)); break; - /* case HSA_REGION_SEGMENT_PRIVATE: - cout<<"PRIVATE"<totalConstMem)); break; case HSA_REGION_SEGMENT_GROUP: - err = hsa_region_get_info(region, HSA_REGION_INFO_SIZE, &(p_prop->sharedMemPerBlock)); break; + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &(p_prop->sharedMemPerBlock)); + break; default: break; } - return HSA_STATUS_SUCCESS; + return err; } @@ -750,11 +749,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) hipError_t e = hipSuccess; hsa_status_t err; - // Set some defaults in case we don't find the appropriate regions: - prop->totalGlobalMem = 0; - prop->totalConstMem = 0; - prop-> maxThreadsPerMultiProcessor = 0; - prop->regsPerBlock = 0; + memset(prop, 0, sizeof(hipDeviceProp_t)); if (_hsaAgent.handle == -1) { return hipErrorInvalidDevice; @@ -854,15 +849,18 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) prop-> maxThreadsPerMultiProcessor = prop->warpSize*max_waves_per_cu; // Get memory properties - err = hsa_agent_iterate_regions(_hsaAgent, get_region_info, prop); + err = hsa_amd_agent_iterate_memory_pools(_hsaAgent, get_pool_info, prop); + if (err == HSA_STATUS_INFO_BREAK) { + err = HSA_STATUS_SUCCESS; + } DeviceErrorCheck(err); - // Get the size of the region we are using for Accelerator Memory allocations: + // Get the size of the pool we are using for Accelerator Memory allocations: hsa_region_t *am_region = static_cast(_acc.get_hsa_am_region()); err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &prop->totalGlobalMem); DeviceErrorCheck(err); // maxSharedMemoryPerMultiProcessor should be as the same as group memory size. - // Group memory will not be paged out, so, the physical memory size is the total shared memory size, and also equal to the group region size. + // Group memory will not be paged out, so, the physical memory size is the total shared memory size, and also equal to the group pool size. prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem; // Get Max memory clock frequency @@ -882,7 +880,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) prop->arch.hasGlobalFloatAtomicExch = 1; prop->arch.hasSharedInt32Atomics = 1; prop->arch.hasSharedFloatAtomicExch = 1; - prop->arch.hasFloatAtomicAdd = 0; + prop->arch.hasFloatAtomicAdd = 1; // supported with CAS loop, but is supported prop->arch.hasGlobalInt64Atomics = 1; prop->arch.hasSharedInt64Atomics = 1; prop->arch.hasDoubles = 1; @@ -890,7 +888,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) prop->arch.hasWarpBallot = 1; prop->arch.hasWarpShuffle = 1; prop->arch.hasFunnelShift = 0; // TODO-hcc - prop->arch.hasThreadFenceSystem = 0; // TODO-hcc + prop->arch.hasThreadFenceSystem = 1; prop->arch.hasSyncThreadsExt = 0; // TODO-hcc prop->arch.hasSurfaceFuncs = 0; // TODO-hcc prop->arch.has3dGrid = 1; diff --git a/tests/src/kernel/hipPrintfKernel.cpp b/tests/src/kernel/hipPrintfKernel.cpp new file mode 100644 index 0000000000..482098fd54 --- /dev/null +++ b/tests/src/kernel/hipPrintfKernel.cpp @@ -0,0 +1,38 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../test_common.cpp + * RUN: %t + * HIT_END + */ + +#define HIP_ENABLE_PRINTF + +#include"test_common.h" + +__global__ void run_printf(hipLaunchParm lp){ + printf("Hello World\n"); +} + +int main(){ +hipLaunchKernel(HIP_KERNEL_NAME(run_printf), dim3(1), dim3(1), 0, 0); +hipDeviceSynchronize(); +passed(); +}