Make HIP functional again with HCC from ROCm 1.9.x
Change-Id: I214acdfd0b79dcf783993e44fe31baee64fd4dc3
Este commit está contenido en:
vendido
+47
-2
@@ -296,8 +296,53 @@ def docker_upload_dockerhub( String local_org, String image_name, String remote_
|
||||
String build_config = 'Release'
|
||||
String job_name = env.JOB_NAME.toLowerCase( )
|
||||
|
||||
// The following launches 2 builds in parallel: rocm-head and cuda-9.x
|
||||
parallel rocm_head:
|
||||
// The following launches 3 builds in parallel: rocm-head, rocm-1.9.x and cuda-9.x
|
||||
parallel rocm_1_9:
|
||||
{
|
||||
node('hip-rocm')
|
||||
{
|
||||
String hcc_ver = 'rocm-1.9.x'
|
||||
String from_image = 'ci_test_nodes/rocm-1.9.x/ubuntu-16.04:latest'
|
||||
String inside_args = '--device=/dev/kfd --device=/dev/dri --group-add=video'
|
||||
|
||||
// Checkout source code, dependencies and version files
|
||||
String source_hip_rel = checkout_and_version( hcc_ver )
|
||||
|
||||
// Create/reuse a docker image that represents the hip build environment
|
||||
def hip_build_image = docker_build_image( hcc_ver, 'hip', '', source_hip_rel, from_image )
|
||||
|
||||
// Print system information for the log
|
||||
hip_build_image.inside( inside_args )
|
||||
{
|
||||
sh """#!/usr/bin/env bash
|
||||
set -x
|
||||
/opt/rocm/bin/rocm_agent_enumerator -t ALL
|
||||
/opt/rocm/bin/hcc --version
|
||||
"""
|
||||
}
|
||||
|
||||
// Conctruct a binary directory path based on build config
|
||||
String build_hip_rel = build_directory_rel( build_config );
|
||||
|
||||
// Build hip inside of the build environment
|
||||
docker_build_inside_image( hip_build_image, inside_args, hcc_ver, '', build_config, source_hip_rel, build_hip_rel )
|
||||
|
||||
// Clean docker build image
|
||||
docker_clean_images( 'hip', docker_build_image_name( ) )
|
||||
|
||||
// After a successful build, upload a docker image of the results
|
||||
/*
|
||||
String hip_image_name = docker_upload_artifactory( hcc_ver, job_name, from_image, source_hip_rel, build_hip_rel )
|
||||
if( params.push_image_to_docker_hub )
|
||||
{
|
||||
docker_upload_dockerhub( job_name, hip_image_name, 'rocm' )
|
||||
docker_clean_images( 'rocm', hip_image_name )
|
||||
}
|
||||
docker_clean_images( job_name, hip_image_name )
|
||||
*/
|
||||
}
|
||||
},
|
||||
rocm_head:
|
||||
{
|
||||
node('hip-rocm')
|
||||
{
|
||||
|
||||
@@ -752,6 +752,80 @@ void *__amdgcn_get_dynamicgroupbaseptr() {
|
||||
return __get_dynamicgroupbaseptr();
|
||||
}
|
||||
|
||||
#if defined(__HCC__) && (__hcc_minor__ < 3)
|
||||
// hip.amdgcn.bc - sync threads
|
||||
#define __CLK_LOCAL_MEM_FENCE 0x01
|
||||
typedef unsigned __cl_mem_fence_flags;
|
||||
|
||||
typedef enum __memory_scope {
|
||||
__memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
|
||||
__memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
|
||||
__memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
|
||||
__memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
|
||||
__memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
|
||||
} __memory_scope;
|
||||
|
||||
// enum values aligned with what clang uses in EmitAtomicExpr()
|
||||
typedef enum __memory_order
|
||||
{
|
||||
__memory_order_relaxed = __ATOMIC_RELAXED,
|
||||
__memory_order_acquire = __ATOMIC_ACQUIRE,
|
||||
__memory_order_release = __ATOMIC_RELEASE,
|
||||
__memory_order_acq_rel = __ATOMIC_ACQ_REL,
|
||||
__memory_order_seq_cst = __ATOMIC_SEQ_CST
|
||||
} __memory_order;
|
||||
|
||||
__device__
|
||||
inline
|
||||
static void
|
||||
__atomic_work_item_fence(__cl_mem_fence_flags flags, __memory_order order, __memory_scope scope)
|
||||
{
|
||||
// We're tying global-happens-before and local-happens-before together as does HSA
|
||||
if (order != __memory_order_relaxed) {
|
||||
switch (scope) {
|
||||
case __memory_scope_work_item:
|
||||
break;
|
||||
case __memory_scope_sub_group:
|
||||
switch (order) {
|
||||
case __memory_order_relaxed: break;
|
||||
case __memory_order_acquire: __llvm_fence_acq_sg(); break;
|
||||
case __memory_order_release: __llvm_fence_rel_sg(); break;
|
||||
case __memory_order_acq_rel: __llvm_fence_ar_sg(); break;
|
||||
case __memory_order_seq_cst: __llvm_fence_sc_sg(); break;
|
||||
}
|
||||
break;
|
||||
case __memory_scope_work_group:
|
||||
switch (order) {
|
||||
case __memory_order_relaxed: break;
|
||||
case __memory_order_acquire: __llvm_fence_acq_wg(); break;
|
||||
case __memory_order_release: __llvm_fence_rel_wg(); break;
|
||||
case __memory_order_acq_rel: __llvm_fence_ar_wg(); break;
|
||||
case __memory_order_seq_cst: __llvm_fence_sc_wg(); break;
|
||||
}
|
||||
break;
|
||||
case __memory_scope_device:
|
||||
switch (order) {
|
||||
case __memory_order_relaxed: break;
|
||||
case __memory_order_acquire: __llvm_fence_acq_dev(); break;
|
||||
case __memory_order_release: __llvm_fence_rel_dev(); break;
|
||||
case __memory_order_acq_rel: __llvm_fence_ar_dev(); break;
|
||||
case __memory_order_seq_cst: __llvm_fence_sc_dev(); break;
|
||||
}
|
||||
break;
|
||||
case __memory_scope_all_svm_devices:
|
||||
switch (order) {
|
||||
case __memory_order_relaxed: break;
|
||||
case __memory_order_acquire: __llvm_fence_acq_sys(); break;
|
||||
case __memory_order_release: __llvm_fence_rel_sys(); break;
|
||||
case __memory_order_acq_rel: __llvm_fence_ar_sys(); break;
|
||||
case __memory_order_seq_cst: __llvm_fence_sc_sys(); break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Memory Fence Functions
|
||||
__device__
|
||||
inline
|
||||
|
||||
@@ -65,6 +65,29 @@ extern "C" __device__ __attribute__((const)) float __ocml_fmax_f32(float, float)
|
||||
__device__ inline static __local void* __to_local(unsigned x) { return (__local void*)x; }
|
||||
#endif //__HIP_DEVICE_COMPILE__
|
||||
|
||||
#if defined(__HCC__) && (__hcc_minor__ < 3)
|
||||
// __llvm_fence* functions from device-libs/irif/src/fence.ll
|
||||
extern "C" __device__ void __llvm_fence_acq_sg(void);
|
||||
extern "C" __device__ void __llvm_fence_acq_wg(void);
|
||||
extern "C" __device__ void __llvm_fence_acq_dev(void);
|
||||
extern "C" __device__ void __llvm_fence_acq_sys(void);
|
||||
|
||||
extern "C" __device__ void __llvm_fence_rel_sg(void);
|
||||
extern "C" __device__ void __llvm_fence_rel_wg(void);
|
||||
extern "C" __device__ void __llvm_fence_rel_dev(void);
|
||||
extern "C" __device__ void __llvm_fence_rel_sys(void);
|
||||
|
||||
extern "C" __device__ void __llvm_fence_ar_sg(void);
|
||||
extern "C" __device__ void __llvm_fence_ar_wg(void);
|
||||
extern "C" __device__ void __llvm_fence_ar_dev(void);
|
||||
extern "C" __device__ void __llvm_fence_ar_sys(void);
|
||||
|
||||
|
||||
extern "C" __device__ void __llvm_fence_sc_sg(void);
|
||||
extern "C" __device__ void __llvm_fence_sc_wg(void);
|
||||
extern "C" __device__ void __llvm_fence_sc_dev(void);
|
||||
extern "C" __device__ void __llvm_fence_sc_sys(void);
|
||||
#else
|
||||
// Using hip.amdgcn.bc - sync threads
|
||||
#define __CLK_LOCAL_MEM_FENCE 0x01
|
||||
typedef unsigned __cl_mem_fence_flags;
|
||||
@@ -90,5 +113,6 @@ typedef enum __memory_order
|
||||
// Linked from hip.amdgcn.bc
|
||||
extern "C" __device__ void
|
||||
__atomic_work_item_fence(__cl_mem_fence_flags, __memory_order, __memory_scope);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -31,12 +31,21 @@ THE SOFTWARE.
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
// Stream
|
||||
//
|
||||
#if defined(__HCC__) && (__hcc_minor__ < 3)
|
||||
enum queue_priority
|
||||
{
|
||||
priority_high = 0,
|
||||
priority_normal = 0,
|
||||
priority_low = 0
|
||||
};
|
||||
#else
|
||||
enum queue_priority
|
||||
{
|
||||
priority_high = Kalmar::priority_high,
|
||||
priority_normal = Kalmar::priority_normal,
|
||||
priority_low = Kalmar::priority_low
|
||||
};
|
||||
#endif
|
||||
|
||||
//---
|
||||
hipError_t ihipStreamCreate(hipStream_t* stream, unsigned int flags, int priority) {
|
||||
@@ -59,7 +68,11 @@ hipError_t ihipStreamCreate(hipStream_t* stream, unsigned int flags, int priorit
|
||||
// Obtain mutex access to the device critical data, release by destructor
|
||||
LockedAccessor_CtxCrit_t ctxCrit(ctx->criticalData());
|
||||
|
||||
#if defined(__HCC__) && (__hcc_minor__ < 3)
|
||||
auto istream = new ihipStream_t(ctx, acc.create_view(), flags);
|
||||
#else
|
||||
auto istream = new ihipStream_t(ctx, acc.create_view(Kalmar::execute_in_order, Kalmar::queuing_mode_automatic, (Kalmar::queue_priority)priority), flags);
|
||||
#endif
|
||||
|
||||
ctxCrit->addStream(istream);
|
||||
*stream = istream;
|
||||
@@ -223,8 +236,12 @@ hipError_t hipStreamGetPriority(hipStream_t stream, int* priority) {
|
||||
} else if (stream == hipStreamNull) {
|
||||
return ihipLogStatus(hipErrorInvalidResourceHandle);
|
||||
} else {
|
||||
#if defined(__HCC__) && (__hcc_minor__ < 3)
|
||||
*priority = 0;
|
||||
#else
|
||||
LockedAccessor_StreamCrit_t crit(stream->_criticalData);
|
||||
*priority = crit->_av.get_queue_priority();
|
||||
#endif
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
}
|
||||
|
||||
Referencia en una nueva incidencia
Block a user