diff --git a/projects/clr/hipamd/docs/markdown/hip_kernel_language.md b/projects/clr/hipamd/docs/markdown/hip_kernel_language.md index 478de7cda6..ec2530e46e 100644 --- a/projects/clr/hipamd/docs/markdown/hip_kernel_language.md +++ b/projects/clr/hipamd/docs/markdown/hip_kernel_language.md @@ -230,6 +230,9 @@ typedef struct dim3 { ## Memory-Fence Instructions HIP support for __threadfence(), __threadfence_block() and __threadfence_system() is under development. +The stubs for the threadfence routines are defined in hcc_details/hip_runtime.h. +Applications that use these threadfence features should disable both of the L1 and L2 caches by: +"export HSA_DISABLE_CACHE=1" ## Synchronization Functions The __syncthreads() built-in function is supported in HIP. The __syncthreads_count(int), __syncthreads_and(int) and __syncthreads_or(int) functions are under development. diff --git a/projects/clr/hipamd/include/hcc_detail/hip_runtime.h b/projects/clr/hipamd/include/hcc_detail/hip_runtime.h index 22095b342d..c29995ba2a 100644 --- a/projects/clr/hipamd/include/hcc_detail/hip_runtime.h +++ b/projects/clr/hipamd/include/hcc_detail/hip_runtime.h @@ -48,7 +48,7 @@ THE SOFTWARE. #ifdef __HCC__ #include -#if defined (GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20) +#if defined (GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20) // Use field names for grid_launch 2.0 structure, if HCC supports GL 2.0. #define USE_GRID_LAUNCH_20 1 #else @@ -496,6 +496,59 @@ __device__ float __dsqrt_rz(double x); * Kernel launching */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Memory Fence Functions + * @{ + * + * + * @warning The HIP memory fence functions are currently not supported yet. + * If any of those threadfence stubs are reached by the application, you should set "export HSA_DISABLE_CACHE=1" to disable L1 and L2 caches. + * + * + * On AMD platforms, the threadfence* routines are currently empty stubs. + */ + + /** + * @brief threadfence_block makes writes visible to threads running in same block. + * + * @Returns void + * + * @param void + * + * @warning __threadfence_block is a stub and map to no-op. + */ +__device__ void __threadfence_block(void); + + /** + * @brief threadfence makes wirtes visible to other threads running on same GPU. + * + * @Returns void + * + * @param void + * + * @warning __threadfence is a stub and map to no-op, application should set "export HSA_DISABLE_CACHE=1" to disable both L1 and L2 caches. + */ +__device__ void __threadfence(void); + +/** + * @brief threadfence_system makes writes to pinned system memory visible on host CPU. + * + * @Returns void + * + * @param void + * + * @warning __threadfence_system is a stub and map to no-op, application should set "export HSA_DISABLE_CACHE=1" to disable both L1 and L2 caches. + */ +__device__ void __threadfence_system(void); + + +// doxygen end Memory Fence +/** + * @} + */ + #define hipThreadIdx_x (hc_get_workitem_id(0)) #define hipThreadIdx_y (hc_get_workitem_id(1)) diff --git a/projects/clr/hipamd/src/hip_memory.cpp b/projects/clr/hipamd/src/hip_memory.cpp index d458256fdf..94442f4698 100644 --- a/projects/clr/hipamd/src/hip_memory.cpp +++ b/projects/clr/hipamd/src/hip_memory.cpp @@ -295,7 +295,7 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved: hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); if (hsa_status != HSA_STATUS_SUCCESS) { - hip_status = hipErrorMemoryAllocation; + hip_status = hipErrorMemoryAllocation; } } } @@ -443,7 +443,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind /** - * @result #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidMemcpyDirection, + * @result #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidMemcpyDirection, * @result #hipErrorInvalidValue : If dst==NULL or src==NULL, or other bad argument. * @warning on HCC hipMemcpyAsync does not support overlapped H2D and D2H copies. * @warning on HCC hipMemcpyAsync requires that any host pointers are pinned (ie via the hipMallocHost call). @@ -703,7 +703,7 @@ hipError_t hipMemGetInfo (size_t *free, size_t *total) size_t deviceMemSize, hostMemSize, userMemSize; hc::am_memtracker_sizeinfo(hipDevice->_acc, &deviceMemSize, &hostMemSize, &userMemSize); printf ("deviceMemSize=%zu\n", deviceMemSize); - + *free = hipDevice->_props.totalGlobalMem - deviceMemSize; } @@ -737,7 +737,7 @@ hipError_t hipFree(void* ptr) } } else { // free NULL pointer succeeds and is common technique to initialize runtime - hipStatus = hipSuccess; + hipStatus = hipSuccess; } return ihipLogStatus(hipStatus); @@ -765,7 +765,7 @@ hipError_t hipHostFree(void* ptr) } } else { // free NULL pointer succeeds and is common technique to initialize runtime - hipStatus = hipSuccess; + hipStatus = hipSuccess; } return ihipLogStatus(hipStatus); @@ -802,4 +802,16 @@ hipError_t hipFreeArray(hipArray* array) return ihipLogStatus(hipStatus); } +// Stubs of threadfence operations +__device__ void __threadfence_block(void){ + // no-op +} + +__device__ void __threadfence(void){ + // no-op +} + +__device__ void __threadfence_system(void){ + // no-op +}