Add empty stubs for threadfence family routines, changes include:
- stubs and documentation in include/hcc_details/hip_runtime.h
- stubs with "no-op" in src/hip_memory.cpp
- document update in hip_kernel_language.md, add suggestions to
disable L1 and L2 caches when using the threadfence routines.
Change-Id: Ic0753170f802003055bca9d7476d7f48817b98b7
[ROCm/clr commit: 6db08e5135]
Этот коммит содержится в:
@@ -230,6 +230,9 @@ typedef struct dim3 {
|
||||
|
||||
## Memory-Fence Instructions
|
||||
HIP support for __threadfence(), __threadfence_block() and __threadfence_system() is under development.
|
||||
The stubs for the threadfence routines are defined in hcc_details/hip_runtime.h.
|
||||
Applications that use these threadfence features should disable both of the L1 and L2 caches by:
|
||||
"export HSA_DISABLE_CACHE=1"
|
||||
|
||||
## Synchronization Functions
|
||||
The __syncthreads() built-in function is supported in HIP. The __syncthreads_count(int), __syncthreads_and(int) and __syncthreads_or(int) functions are under development.
|
||||
|
||||
@@ -48,7 +48,7 @@ THE SOFTWARE.
|
||||
#ifdef __HCC__
|
||||
#include <grid_launch.h>
|
||||
|
||||
#if defined (GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20)
|
||||
#if defined (GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20)
|
||||
// Use field names for grid_launch 2.0 structure, if HCC supports GL 2.0.
|
||||
#define USE_GRID_LAUNCH_20 1
|
||||
#else
|
||||
@@ -496,6 +496,59 @@ __device__ float __dsqrt_rz(double x);
|
||||
* Kernel launching
|
||||
*/
|
||||
|
||||
/**
|
||||
*-------------------------------------------------------------------------------------------------
|
||||
*-------------------------------------------------------------------------------------------------
|
||||
* @defgroup Memory Fence Functions
|
||||
* @{
|
||||
*
|
||||
*
|
||||
* @warning The HIP memory fence functions are currently not supported yet.
|
||||
* If any of those threadfence stubs are reached by the application, you should set "export HSA_DISABLE_CACHE=1" to disable L1 and L2 caches.
|
||||
*
|
||||
*
|
||||
* On AMD platforms, the threadfence* routines are currently empty stubs.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief threadfence_block makes writes visible to threads running in same block.
|
||||
*
|
||||
* @Returns void
|
||||
*
|
||||
* @param void
|
||||
*
|
||||
* @warning __threadfence_block is a stub and map to no-op.
|
||||
*/
|
||||
__device__ void __threadfence_block(void);
|
||||
|
||||
/**
|
||||
* @brief threadfence makes wirtes visible to other threads running on same GPU.
|
||||
*
|
||||
* @Returns void
|
||||
*
|
||||
* @param void
|
||||
*
|
||||
* @warning __threadfence is a stub and map to no-op, application should set "export HSA_DISABLE_CACHE=1" to disable both L1 and L2 caches.
|
||||
*/
|
||||
__device__ void __threadfence(void);
|
||||
|
||||
/**
|
||||
* @brief threadfence_system makes writes to pinned system memory visible on host CPU.
|
||||
*
|
||||
* @Returns void
|
||||
*
|
||||
* @param void
|
||||
*
|
||||
* @warning __threadfence_system is a stub and map to no-op, application should set "export HSA_DISABLE_CACHE=1" to disable both L1 and L2 caches.
|
||||
*/
|
||||
__device__ void __threadfence_system(void);
|
||||
|
||||
|
||||
// doxygen end Memory Fence
|
||||
/**
|
||||
* @}
|
||||
*/
|
||||
|
||||
|
||||
#define hipThreadIdx_x (hc_get_workitem_id(0))
|
||||
#define hipThreadIdx_y (hc_get_workitem_id(1))
|
||||
|
||||
@@ -295,7 +295,7 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
|
||||
if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved:
|
||||
hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
|
||||
if (hsa_status != HSA_STATUS_SUCCESS) {
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -443,7 +443,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind
|
||||
|
||||
|
||||
/**
|
||||
* @result #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidMemcpyDirection,
|
||||
* @result #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidMemcpyDirection,
|
||||
* @result #hipErrorInvalidValue : If dst==NULL or src==NULL, or other bad argument.
|
||||
* @warning on HCC hipMemcpyAsync does not support overlapped H2D and D2H copies.
|
||||
* @warning on HCC hipMemcpyAsync requires that any host pointers are pinned (ie via the hipMallocHost call).
|
||||
@@ -703,7 +703,7 @@ hipError_t hipMemGetInfo (size_t *free, size_t *total)
|
||||
size_t deviceMemSize, hostMemSize, userMemSize;
|
||||
hc::am_memtracker_sizeinfo(hipDevice->_acc, &deviceMemSize, &hostMemSize, &userMemSize);
|
||||
printf ("deviceMemSize=%zu\n", deviceMemSize);
|
||||
|
||||
|
||||
*free = hipDevice->_props.totalGlobalMem - deviceMemSize;
|
||||
}
|
||||
|
||||
@@ -737,7 +737,7 @@ hipError_t hipFree(void* ptr)
|
||||
}
|
||||
} else {
|
||||
// free NULL pointer succeeds and is common technique to initialize runtime
|
||||
hipStatus = hipSuccess;
|
||||
hipStatus = hipSuccess;
|
||||
}
|
||||
|
||||
return ihipLogStatus(hipStatus);
|
||||
@@ -765,7 +765,7 @@ hipError_t hipHostFree(void* ptr)
|
||||
}
|
||||
} else {
|
||||
// free NULL pointer succeeds and is common technique to initialize runtime
|
||||
hipStatus = hipSuccess;
|
||||
hipStatus = hipSuccess;
|
||||
}
|
||||
|
||||
return ihipLogStatus(hipStatus);
|
||||
@@ -802,4 +802,16 @@ hipError_t hipFreeArray(hipArray* array)
|
||||
return ihipLogStatus(hipStatus);
|
||||
}
|
||||
|
||||
// Stubs of threadfence operations
|
||||
__device__ void __threadfence_block(void){
|
||||
// no-op
|
||||
}
|
||||
|
||||
__device__ void __threadfence(void){
|
||||
// no-op
|
||||
}
|
||||
|
||||
__device__ void __threadfence_system(void){
|
||||
// no-op
|
||||
}
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user