diff --git a/projects/clr/hipamd/CMakeLists.txt b/projects/clr/hipamd/CMakeLists.txt index 8bf1b0c3b8..cbfc44b17d 100644 --- a/projects/clr/hipamd/CMakeLists.txt +++ b/projects/clr/hipamd/CMakeLists.txt @@ -222,6 +222,7 @@ if(HIP_PLATFORM STREQUAL "hcc") # Install .buildInfo install(FILES ${PROJECT_BINARY_DIR}/.buildInfo DESTINATION lib) + install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/hip_ir.ll DESTINATION lib) endif() # Install .version diff --git a/projects/clr/hipamd/bin/hipcc b/projects/clr/hipamd/bin/hipcc index 5c991bfc25..c4f592d814 100755 --- a/projects/clr/hipamd/bin/hipcc +++ b/projects/clr/hipamd/bin/hipcc @@ -196,6 +196,11 @@ if($HIP_PLATFORM eq "hcc"){ } } +if($HIP_PLATFORM eq "hcc"){ + $EXPORT_LL=" "; + $ENV{HCC_EXTRA_LIBRARIES}="$HIP_PATH/lib/hip_ir.ll\n"; +} + if($HIP_PLATFORM eq "nvcc"){ $ISACMD .= "$HIP_PATH/bin/hipcc -ptx "; if($ARGV[0] eq "--genco"){ diff --git a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime.h b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime.h index 47b75f282d..f0f1364997 100755 --- a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -533,7 +533,8 @@ __device__ float __dsqrt_rz(double x); * * @warning __threadfence_block is a stub and map to no-op. */ -__device__ void __threadfence_block(void); +// __device__ void __threadfence_block(void); +extern "C" __device__ void __threadfence_block(void); /** * @brief threadfence makes wirtes visible to other threads running on same GPU. @@ -544,7 +545,8 @@ __device__ void __threadfence_block(void); * * @warning __threadfence is a stub and map to no-op, application should set "export HSA_DISABLE_CACHE=1" to disable both L1 and L2 caches. */ -__device__ void __threadfence(void) __attribute__((deprecated("Provided for compile-time compatibility, not yet functional"))); +// __device__ void __threadfence(void) __attribute__((deprecated("Provided for compile-time compatibility, not yet functional"))); +extern "C" __device__ void __threadfence(void); /** * @brief threadfence_system makes writes to pinned system memory visible on host CPU. diff --git a/projects/clr/hipamd/src/hip_ir.ll b/projects/clr/hipamd/src/hip_ir.ll new file mode 100644 index 0000000000..6850293778 --- /dev/null +++ b/projects/clr/hipamd/src/hip_ir.ll @@ -0,0 +1,15 @@ +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" +target triple = "amdgcn--amdhsa" + + +define void @__threadfence() #1 { + fence syncscope(2) seq_cst + ret void +} + +define void @__threadfence_block() #1 { + fence syncscope(3) seq_cst + ret void +} + +attributes #1 = { alwaysinline nounwind } diff --git a/projects/clr/hipamd/tests/src/deviceLib/hipThreadFence.cpp b/projects/clr/hipamd/tests/src/deviceLib/hipThreadFence.cpp new file mode 100644 index 0000000000..e73ccf6ad3 --- /dev/null +++ b/projects/clr/hipamd/tests/src/deviceLib/hipThreadFence.cpp @@ -0,0 +1,69 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +#define NUM 1024 +#define SIZE NUM*sizeof(float) + +__global__ void vAdd(hipLaunchParm lp, float *In1, float *In2, float *In3, float *In4, float *Out) +{ + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + In4[tid] = In1[tid] + In2[tid]; + __threadfence(); + In3[tid] = In3[tid] + In4[tid]; + __threadfence_block(); + Out[tid] = In4[tid] + In3[tid]; + +} + +int main(){ + float *In1 = new float[1024]; + float *In2 = new float[1024]; + float *In3 = new float[1024]; + float *In4 = new float[1024]; + float *Out = new float[1024]; + + for(uint32_t i=0;i<1024;i++) + { + In1[i] = 1.0f; + In2[i] = 1.0f; + In3[i] = 1.0f; + In4[i] = 1.0f; + } + + float *In1d, *In2d, *In3d, *In4d, *Outd; + hipMalloc((void**)&In1d, SIZE); + hipMalloc((void**)&In2d, SIZE); + hipMalloc((void**)&In3d, SIZE); + hipMalloc((void**)&In4d, SIZE); + hipMalloc((void**)&Outd, SIZE); + + hipMemcpy(In1d, In1, SIZE, hipMemcpyHostToDevice); + hipMemcpy(In2d, In2, SIZE, hipMemcpyHostToDevice); + hipMemcpy(In3d, In3, SIZE, hipMemcpyHostToDevice); + hipMemcpy(In4d, In4, SIZE, hipMemcpyHostToDevice); + + hipLaunchKernel(vAdd, dim3(32,1,1), dim3(32,1,1), 0, 0, In1d, In2d, In3d, In4d, Outd); + hipMemcpy(Out, Outd, SIZE, hipMemcpyDeviceToHost); + assert(Out[10] == 2*In1[10] + 2*In2[10] + In3[10]); + +}