Make HIP functional again with HCC from ROCm 1.9.x

Change-Id: I214acdfd0b79dcf783993e44fe31baee64fd4dc3
Este commit está contenido en:
Maneesh Gupta
2018-10-23 13:20:49 +05:30
padre 7631fd7f90
commit b01ac26948
Se han modificado 4 ficheros con 162 adiciones y 2 borrados
+47 -2
Ver fichero
@@ -296,8 +296,53 @@ def docker_upload_dockerhub( String local_org, String image_name, String remote_
String build_config = 'Release'
String job_name = env.JOB_NAME.toLowerCase( )
// The following launches 2 builds in parallel: rocm-head and cuda-9.x
parallel rocm_head:
// The following launches 3 builds in parallel: rocm-head, rocm-1.9.x and cuda-9.x
parallel rocm_1_9:
{
node('hip-rocm')
{
String hcc_ver = 'rocm-1.9.x'
String from_image = 'ci_test_nodes/rocm-1.9.x/ubuntu-16.04:latest'
String inside_args = '--device=/dev/kfd --device=/dev/dri --group-add=video'
// Checkout source code, dependencies and version files
String source_hip_rel = checkout_and_version( hcc_ver )
// Create/reuse a docker image that represents the hip build environment
def hip_build_image = docker_build_image( hcc_ver, 'hip', '', source_hip_rel, from_image )
// Print system information for the log
hip_build_image.inside( inside_args )
{
sh """#!/usr/bin/env bash
set -x
/opt/rocm/bin/rocm_agent_enumerator -t ALL
/opt/rocm/bin/hcc --version
"""
}
// Conctruct a binary directory path based on build config
String build_hip_rel = build_directory_rel( build_config );
// Build hip inside of the build environment
docker_build_inside_image( hip_build_image, inside_args, hcc_ver, '', build_config, source_hip_rel, build_hip_rel )
// Clean docker build image
docker_clean_images( 'hip', docker_build_image_name( ) )
// After a successful build, upload a docker image of the results
/*
String hip_image_name = docker_upload_artifactory( hcc_ver, job_name, from_image, source_hip_rel, build_hip_rel )
if( params.push_image_to_docker_hub )
{
docker_upload_dockerhub( job_name, hip_image_name, 'rocm' )
docker_clean_images( 'rocm', hip_image_name )
}
docker_clean_images( job_name, hip_image_name )
*/
}
},
rocm_head:
{
node('hip-rocm')
{
@@ -752,6 +752,80 @@ void *__amdgcn_get_dynamicgroupbaseptr() {
return __get_dynamicgroupbaseptr();
}
#if defined(__HCC__) && (__hcc_minor__ < 3)
// hip.amdgcn.bc - sync threads
#define __CLK_LOCAL_MEM_FENCE 0x01
typedef unsigned __cl_mem_fence_flags;
typedef enum __memory_scope {
__memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
__memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
__memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
__memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
__memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
} __memory_scope;
// enum values aligned with what clang uses in EmitAtomicExpr()
typedef enum __memory_order
{
__memory_order_relaxed = __ATOMIC_RELAXED,
__memory_order_acquire = __ATOMIC_ACQUIRE,
__memory_order_release = __ATOMIC_RELEASE,
__memory_order_acq_rel = __ATOMIC_ACQ_REL,
__memory_order_seq_cst = __ATOMIC_SEQ_CST
} __memory_order;
__device__
inline
static void
__atomic_work_item_fence(__cl_mem_fence_flags flags, __memory_order order, __memory_scope scope)
{
// We're tying global-happens-before and local-happens-before together as does HSA
if (order != __memory_order_relaxed) {
switch (scope) {
case __memory_scope_work_item:
break;
case __memory_scope_sub_group:
switch (order) {
case __memory_order_relaxed: break;
case __memory_order_acquire: __llvm_fence_acq_sg(); break;
case __memory_order_release: __llvm_fence_rel_sg(); break;
case __memory_order_acq_rel: __llvm_fence_ar_sg(); break;
case __memory_order_seq_cst: __llvm_fence_sc_sg(); break;
}
break;
case __memory_scope_work_group:
switch (order) {
case __memory_order_relaxed: break;
case __memory_order_acquire: __llvm_fence_acq_wg(); break;
case __memory_order_release: __llvm_fence_rel_wg(); break;
case __memory_order_acq_rel: __llvm_fence_ar_wg(); break;
case __memory_order_seq_cst: __llvm_fence_sc_wg(); break;
}
break;
case __memory_scope_device:
switch (order) {
case __memory_order_relaxed: break;
case __memory_order_acquire: __llvm_fence_acq_dev(); break;
case __memory_order_release: __llvm_fence_rel_dev(); break;
case __memory_order_acq_rel: __llvm_fence_ar_dev(); break;
case __memory_order_seq_cst: __llvm_fence_sc_dev(); break;
}
break;
case __memory_scope_all_svm_devices:
switch (order) {
case __memory_order_relaxed: break;
case __memory_order_acquire: __llvm_fence_acq_sys(); break;
case __memory_order_release: __llvm_fence_rel_sys(); break;
case __memory_order_acq_rel: __llvm_fence_ar_sys(); break;
case __memory_order_seq_cst: __llvm_fence_sc_sys(); break;
}
break;
}
}
}
#endif
// Memory Fence Functions
__device__
inline
@@ -65,6 +65,29 @@ extern "C" __device__ __attribute__((const)) float __ocml_fmax_f32(float, float)
__device__ inline static __local void* __to_local(unsigned x) { return (__local void*)x; }
#endif //__HIP_DEVICE_COMPILE__
#if defined(__HCC__) && (__hcc_minor__ < 3)
// __llvm_fence* functions from device-libs/irif/src/fence.ll
extern "C" __device__ void __llvm_fence_acq_sg(void);
extern "C" __device__ void __llvm_fence_acq_wg(void);
extern "C" __device__ void __llvm_fence_acq_dev(void);
extern "C" __device__ void __llvm_fence_acq_sys(void);
extern "C" __device__ void __llvm_fence_rel_sg(void);
extern "C" __device__ void __llvm_fence_rel_wg(void);
extern "C" __device__ void __llvm_fence_rel_dev(void);
extern "C" __device__ void __llvm_fence_rel_sys(void);
extern "C" __device__ void __llvm_fence_ar_sg(void);
extern "C" __device__ void __llvm_fence_ar_wg(void);
extern "C" __device__ void __llvm_fence_ar_dev(void);
extern "C" __device__ void __llvm_fence_ar_sys(void);
extern "C" __device__ void __llvm_fence_sc_sg(void);
extern "C" __device__ void __llvm_fence_sc_wg(void);
extern "C" __device__ void __llvm_fence_sc_dev(void);
extern "C" __device__ void __llvm_fence_sc_sys(void);
#else
// Using hip.amdgcn.bc - sync threads
#define __CLK_LOCAL_MEM_FENCE 0x01
typedef unsigned __cl_mem_fence_flags;
@@ -90,5 +113,6 @@ typedef enum __memory_order
// Linked from hip.amdgcn.bc
extern "C" __device__ void
__atomic_work_item_fence(__cl_mem_fence_flags, __memory_order, __memory_scope);
#endif
#endif
+17
Ver fichero
@@ -31,12 +31,21 @@ THE SOFTWARE.
//-------------------------------------------------------------------------------------------------
// Stream
//
#if defined(__HCC__) && (__hcc_minor__ < 3)
enum queue_priority
{
priority_high = 0,
priority_normal = 0,
priority_low = 0
};
#else
enum queue_priority
{
priority_high = Kalmar::priority_high,
priority_normal = Kalmar::priority_normal,
priority_low = Kalmar::priority_low
};
#endif
//---
hipError_t ihipStreamCreate(hipStream_t* stream, unsigned int flags, int priority) {
@@ -59,7 +68,11 @@ hipError_t ihipStreamCreate(hipStream_t* stream, unsigned int flags, int priorit
// Obtain mutex access to the device critical data, release by destructor
LockedAccessor_CtxCrit_t ctxCrit(ctx->criticalData());
#if defined(__HCC__) && (__hcc_minor__ < 3)
auto istream = new ihipStream_t(ctx, acc.create_view(), flags);
#else
auto istream = new ihipStream_t(ctx, acc.create_view(Kalmar::execute_in_order, Kalmar::queuing_mode_automatic, (Kalmar::queue_priority)priority), flags);
#endif
ctxCrit->addStream(istream);
*stream = istream;
@@ -223,8 +236,12 @@ hipError_t hipStreamGetPriority(hipStream_t stream, int* priority) {
} else if (stream == hipStreamNull) {
return ihipLogStatus(hipErrorInvalidResourceHandle);
} else {
#if defined(__HCC__) && (__hcc_minor__ < 3)
*priority = 0;
#else
LockedAccessor_StreamCrit_t crit(stream->_criticalData);
*priority = crit->_av.get_queue_priority();
#endif
return ihipLogStatus(hipSuccess);
}
}