Add empty stubs for threadfence family routines, changes include:

- stubs and documentation in include/hcc_details/hip_runtime.h
    - stubs with "no-op" in src/hip_memory.cpp
    - document update in hip_kernel_language.md, add suggestions to
    disable L1 and L2 caches when using the threadfence routines.

Change-Id: Ic0753170f802003055bca9d7476d7f48817b98b7


[ROCm/clr commit: 6db08e5135]
Этот коммит содержится в:
pensun
2016-07-22 10:40:58 -05:00
родитель 64d0ffb404
Коммит 2c945dba23
3 изменённых файлов: 74 добавлений и 6 удалений
+3
Просмотреть файл
@@ -230,6 +230,9 @@ typedef struct dim3 {
## Memory-Fence Instructions
HIP support for __threadfence(), __threadfence_block() and __threadfence_system() is under development.
The stubs for the threadfence routines are defined in hcc_details/hip_runtime.h.
Applications that use these threadfence features should disable both of the L1 and L2 caches by:
"export HSA_DISABLE_CACHE=1"
## Synchronization Functions
The __syncthreads() built-in function is supported in HIP. The __syncthreads_count(int), __syncthreads_and(int) and __syncthreads_or(int) functions are under development.
+54 -1
Просмотреть файл
@@ -48,7 +48,7 @@ THE SOFTWARE.
#ifdef __HCC__
#include <grid_launch.h>
#if defined (GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20)
#if defined (GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20)
// Use field names for grid_launch 2.0 structure, if HCC supports GL 2.0.
#define USE_GRID_LAUNCH_20 1
#else
@@ -496,6 +496,59 @@ __device__ float __dsqrt_rz(double x);
* Kernel launching
*/
/**
*-------------------------------------------------------------------------------------------------
*-------------------------------------------------------------------------------------------------
* @defgroup Memory Fence Functions
* @{
*
*
* @warning The HIP memory fence functions are currently not supported yet.
* If any of those threadfence stubs are reached by the application, you should set "export HSA_DISABLE_CACHE=1" to disable L1 and L2 caches.
*
*
* On AMD platforms, the threadfence* routines are currently empty stubs.
*/
/**
* @brief threadfence_block makes writes visible to threads running in same block.
*
* @Returns void
*
* @param void
*
* @warning __threadfence_block is a stub and map to no-op.
*/
__device__ void __threadfence_block(void);
/**
* @brief threadfence makes wirtes visible to other threads running on same GPU.
*
* @Returns void
*
* @param void
*
* @warning __threadfence is a stub and map to no-op, application should set "export HSA_DISABLE_CACHE=1" to disable both L1 and L2 caches.
*/
__device__ void __threadfence(void);
/**
* @brief threadfence_system makes writes to pinned system memory visible on host CPU.
*
* @Returns void
*
* @param void
*
* @warning __threadfence_system is a stub and map to no-op, application should set "export HSA_DISABLE_CACHE=1" to disable both L1 and L2 caches.
*/
__device__ void __threadfence_system(void);
// doxygen end Memory Fence
/**
* @}
*/
#define hipThreadIdx_x (hc_get_workitem_id(0))
#define hipThreadIdx_y (hc_get_workitem_id(1))
+17 -5
Просмотреть файл
@@ -295,7 +295,7 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved:
hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
if (hsa_status != HSA_STATUS_SUCCESS) {
hip_status = hipErrorMemoryAllocation;
hip_status = hipErrorMemoryAllocation;
}
}
}
@@ -443,7 +443,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind
/**
* @result #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidMemcpyDirection,
* @result #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidMemcpyDirection,
* @result #hipErrorInvalidValue : If dst==NULL or src==NULL, or other bad argument.
* @warning on HCC hipMemcpyAsync does not support overlapped H2D and D2H copies.
* @warning on HCC hipMemcpyAsync requires that any host pointers are pinned (ie via the hipMallocHost call).
@@ -703,7 +703,7 @@ hipError_t hipMemGetInfo (size_t *free, size_t *total)
size_t deviceMemSize, hostMemSize, userMemSize;
hc::am_memtracker_sizeinfo(hipDevice->_acc, &deviceMemSize, &hostMemSize, &userMemSize);
printf ("deviceMemSize=%zu\n", deviceMemSize);
*free = hipDevice->_props.totalGlobalMem - deviceMemSize;
}
@@ -737,7 +737,7 @@ hipError_t hipFree(void* ptr)
}
} else {
// free NULL pointer succeeds and is common technique to initialize runtime
hipStatus = hipSuccess;
hipStatus = hipSuccess;
}
return ihipLogStatus(hipStatus);
@@ -765,7 +765,7 @@ hipError_t hipHostFree(void* ptr)
}
} else {
// free NULL pointer succeeds and is common technique to initialize runtime
hipStatus = hipSuccess;
hipStatus = hipSuccess;
}
return ihipLogStatus(hipStatus);
@@ -802,4 +802,16 @@ hipError_t hipFreeArray(hipArray* array)
return ihipLogStatus(hipStatus);
}
// Stubs of threadfence operations
__device__ void __threadfence_block(void){
// no-op
}
__device__ void __threadfence(void){
// no-op
}
__device__ void __threadfence_system(void){
// no-op
}