From b01ac269486a356cb05dc0c1d8e7a3480b30afaf Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Tue, 23 Oct 2018 13:20:49 +0530 Subject: [PATCH] Make HIP functional again with HCC from ROCm 1.9.x Change-Id: I214acdfd0b79dcf783993e44fe31baee64fd4dc3 --- hipamd/Jenkinsfile | 49 +++++++++++- .../include/hip/hcc_detail/device_functions.h | 74 +++++++++++++++++++ .../hip/hcc_detail/device_library_decls.h | 24 ++++++ hipamd/src/hip_stream.cpp | 17 +++++ 4 files changed, 162 insertions(+), 2 deletions(-) diff --git a/hipamd/Jenkinsfile b/hipamd/Jenkinsfile index 540e7e6689..6d37f10e3c 100644 --- a/hipamd/Jenkinsfile +++ b/hipamd/Jenkinsfile @@ -296,8 +296,53 @@ def docker_upload_dockerhub( String local_org, String image_name, String remote_ String build_config = 'Release' String job_name = env.JOB_NAME.toLowerCase( ) -// The following launches 2 builds in parallel: rocm-head and cuda-9.x -parallel rocm_head: +// The following launches 3 builds in parallel: rocm-head, rocm-1.9.x and cuda-9.x +parallel rocm_1_9: +{ + node('hip-rocm') + { + String hcc_ver = 'rocm-1.9.x' + String from_image = 'ci_test_nodes/rocm-1.9.x/ubuntu-16.04:latest' + String inside_args = '--device=/dev/kfd --device=/dev/dri --group-add=video' + + // Checkout source code, dependencies and version files + String source_hip_rel = checkout_and_version( hcc_ver ) + + // Create/reuse a docker image that represents the hip build environment + def hip_build_image = docker_build_image( hcc_ver, 'hip', '', source_hip_rel, from_image ) + + // Print system information for the log + hip_build_image.inside( inside_args ) + { + sh """#!/usr/bin/env bash + set -x + /opt/rocm/bin/rocm_agent_enumerator -t ALL + /opt/rocm/bin/hcc --version + """ + } + + // Conctruct a binary directory path based on build config + String build_hip_rel = build_directory_rel( build_config ); + + // Build hip inside of the build environment + docker_build_inside_image( hip_build_image, inside_args, hcc_ver, '', build_config, source_hip_rel, build_hip_rel ) + + // Clean docker build image + docker_clean_images( 'hip', docker_build_image_name( ) ) + + // After a successful build, upload a docker image of the results + /* + String hip_image_name = docker_upload_artifactory( hcc_ver, job_name, from_image, source_hip_rel, build_hip_rel ) + if( params.push_image_to_docker_hub ) + { + docker_upload_dockerhub( job_name, hip_image_name, 'rocm' ) + docker_clean_images( 'rocm', hip_image_name ) + } + docker_clean_images( job_name, hip_image_name ) + */ + } +}, +rocm_head: { node('hip-rocm') { diff --git a/hipamd/include/hip/hcc_detail/device_functions.h b/hipamd/include/hip/hcc_detail/device_functions.h index 42927e3246..2d5f73c153 100644 --- a/hipamd/include/hip/hcc_detail/device_functions.h +++ b/hipamd/include/hip/hcc_detail/device_functions.h @@ -752,6 +752,80 @@ void *__amdgcn_get_dynamicgroupbaseptr() { return __get_dynamicgroupbaseptr(); } +#if defined(__HCC__) && (__hcc_minor__ < 3) +// hip.amdgcn.bc - sync threads +#define __CLK_LOCAL_MEM_FENCE 0x01 +typedef unsigned __cl_mem_fence_flags; + +typedef enum __memory_scope { + __memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, + __memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, + __memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, + __memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, + __memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP +} __memory_scope; + +// enum values aligned with what clang uses in EmitAtomicExpr() +typedef enum __memory_order +{ + __memory_order_relaxed = __ATOMIC_RELAXED, + __memory_order_acquire = __ATOMIC_ACQUIRE, + __memory_order_release = __ATOMIC_RELEASE, + __memory_order_acq_rel = __ATOMIC_ACQ_REL, + __memory_order_seq_cst = __ATOMIC_SEQ_CST +} __memory_order; + +__device__ +inline +static void +__atomic_work_item_fence(__cl_mem_fence_flags flags, __memory_order order, __memory_scope scope) +{ + // We're tying global-happens-before and local-happens-before together as does HSA + if (order != __memory_order_relaxed) { + switch (scope) { + case __memory_scope_work_item: + break; + case __memory_scope_sub_group: + switch (order) { + case __memory_order_relaxed: break; + case __memory_order_acquire: __llvm_fence_acq_sg(); break; + case __memory_order_release: __llvm_fence_rel_sg(); break; + case __memory_order_acq_rel: __llvm_fence_ar_sg(); break; + case __memory_order_seq_cst: __llvm_fence_sc_sg(); break; + } + break; + case __memory_scope_work_group: + switch (order) { + case __memory_order_relaxed: break; + case __memory_order_acquire: __llvm_fence_acq_wg(); break; + case __memory_order_release: __llvm_fence_rel_wg(); break; + case __memory_order_acq_rel: __llvm_fence_ar_wg(); break; + case __memory_order_seq_cst: __llvm_fence_sc_wg(); break; + } + break; + case __memory_scope_device: + switch (order) { + case __memory_order_relaxed: break; + case __memory_order_acquire: __llvm_fence_acq_dev(); break; + case __memory_order_release: __llvm_fence_rel_dev(); break; + case __memory_order_acq_rel: __llvm_fence_ar_dev(); break; + case __memory_order_seq_cst: __llvm_fence_sc_dev(); break; + } + break; + case __memory_scope_all_svm_devices: + switch (order) { + case __memory_order_relaxed: break; + case __memory_order_acquire: __llvm_fence_acq_sys(); break; + case __memory_order_release: __llvm_fence_rel_sys(); break; + case __memory_order_acq_rel: __llvm_fence_ar_sys(); break; + case __memory_order_seq_cst: __llvm_fence_sc_sys(); break; + } + break; + } + } +} +#endif + // Memory Fence Functions __device__ inline diff --git a/hipamd/include/hip/hcc_detail/device_library_decls.h b/hipamd/include/hip/hcc_detail/device_library_decls.h index 2a0b968fe3..2bf3c8cc51 100644 --- a/hipamd/include/hip/hcc_detail/device_library_decls.h +++ b/hipamd/include/hip/hcc_detail/device_library_decls.h @@ -65,6 +65,29 @@ extern "C" __device__ __attribute__((const)) float __ocml_fmax_f32(float, float) __device__ inline static __local void* __to_local(unsigned x) { return (__local void*)x; } #endif //__HIP_DEVICE_COMPILE__ +#if defined(__HCC__) && (__hcc_minor__ < 3) +// __llvm_fence* functions from device-libs/irif/src/fence.ll +extern "C" __device__ void __llvm_fence_acq_sg(void); +extern "C" __device__ void __llvm_fence_acq_wg(void); +extern "C" __device__ void __llvm_fence_acq_dev(void); +extern "C" __device__ void __llvm_fence_acq_sys(void); + +extern "C" __device__ void __llvm_fence_rel_sg(void); +extern "C" __device__ void __llvm_fence_rel_wg(void); +extern "C" __device__ void __llvm_fence_rel_dev(void); +extern "C" __device__ void __llvm_fence_rel_sys(void); + +extern "C" __device__ void __llvm_fence_ar_sg(void); +extern "C" __device__ void __llvm_fence_ar_wg(void); +extern "C" __device__ void __llvm_fence_ar_dev(void); +extern "C" __device__ void __llvm_fence_ar_sys(void); + + +extern "C" __device__ void __llvm_fence_sc_sg(void); +extern "C" __device__ void __llvm_fence_sc_wg(void); +extern "C" __device__ void __llvm_fence_sc_dev(void); +extern "C" __device__ void __llvm_fence_sc_sys(void); +#else // Using hip.amdgcn.bc - sync threads #define __CLK_LOCAL_MEM_FENCE 0x01 typedef unsigned __cl_mem_fence_flags; @@ -90,5 +113,6 @@ typedef enum __memory_order // Linked from hip.amdgcn.bc extern "C" __device__ void __atomic_work_item_fence(__cl_mem_fence_flags, __memory_order, __memory_scope); +#endif #endif diff --git a/hipamd/src/hip_stream.cpp b/hipamd/src/hip_stream.cpp index 1e239cb2fc..2268a203dd 100644 --- a/hipamd/src/hip_stream.cpp +++ b/hipamd/src/hip_stream.cpp @@ -31,12 +31,21 @@ THE SOFTWARE. //------------------------------------------------------------------------------------------------- // Stream // +#if defined(__HCC__) && (__hcc_minor__ < 3) +enum queue_priority +{ + priority_high = 0, + priority_normal = 0, + priority_low = 0 +}; +#else enum queue_priority { priority_high = Kalmar::priority_high, priority_normal = Kalmar::priority_normal, priority_low = Kalmar::priority_low }; +#endif //--- hipError_t ihipStreamCreate(hipStream_t* stream, unsigned int flags, int priority) { @@ -59,7 +68,11 @@ hipError_t ihipStreamCreate(hipStream_t* stream, unsigned int flags, int priorit // Obtain mutex access to the device critical data, release by destructor LockedAccessor_CtxCrit_t ctxCrit(ctx->criticalData()); +#if defined(__HCC__) && (__hcc_minor__ < 3) + auto istream = new ihipStream_t(ctx, acc.create_view(), flags); +#else auto istream = new ihipStream_t(ctx, acc.create_view(Kalmar::execute_in_order, Kalmar::queuing_mode_automatic, (Kalmar::queue_priority)priority), flags); +#endif ctxCrit->addStream(istream); *stream = istream; @@ -223,8 +236,12 @@ hipError_t hipStreamGetPriority(hipStream_t stream, int* priority) { } else if (stream == hipStreamNull) { return ihipLogStatus(hipErrorInvalidResourceHandle); } else { +#if defined(__HCC__) && (__hcc_minor__ < 3) + *priority = 0; +#else LockedAccessor_StreamCrit_t crit(stream->_criticalData); *priority = crit->_av.get_queue_priority(); +#endif return ihipLogStatus(hipSuccess); } }