From b754de37c1bf6a9aaad221ff6101f6c79ede66a4 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Wed, 24 Jul 2019 03:51:17 -0400 Subject: [PATCH] Add GFX908 specific changes to HIP (#1229) * Add GFX908 specific for HIP * Fix missing __halfTest in hipTestNativeHalf --- CMakeLists.txt | 2 +- bin/hipcc | 23 +++++++++++++++++++++++ lpl_ca/ca.hpp | 2 +- lpl_ca/common.hpp | 2 +- lpl_ca/lpl.hpp | 2 +- tests/src/deviceLib/hipMathFunctions.cpp | 2 +- tests/src/deviceLib/hipTestHalf.cpp | 2 +- tests/src/deviceLib/hipTestNativeHalf.cpp | 2 +- 8 files changed, 30 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8701fe5635..187b32045b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -254,7 +254,7 @@ if(HIP_PLATFORM STREQUAL "hcc") execute_process(COMMAND ${HCC_HOME}/bin/hcc-config --ldflags OUTPUT_VARIABLE HCC_LD_FLAGS) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${HCC_LD_FLAGS}") - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --amdgpu-target=gfx701 --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --amdgpu-target=gfx701 --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906 --amdgpu-target=gfx908") if(COMPILE_HIP_ATP_MARKER) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L/opt/rocm/profiler/CXLActivityLogger/bin/x86_64 -lCXLActivityLogger") endif() diff --git a/bin/hipcc b/bin/hipcc index d2fa9a2306..85e090c761 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -156,6 +156,7 @@ $target_gfx802 = 0; $target_gfx803 = 0; $target_gfx900 = 0; $target_gfx906 = 0; +$target_gfx908 = 0; $target_gfx1010 = 0; $target_gfx1012 = 0; $default_amdgpu_target = 1; @@ -429,6 +430,11 @@ foreach $arg (@ARGV) $target_gfx906 = 1; $default_amdgpu_target = 0; } + if($arg eq '--amdgpu-target=gfx908') + { + $target_gfx908 = 1; + $default_amdgpu_target = 0; + } if($arg eq '--amdgpu-target=gfx1010') { $target_gfx1010 = 1; @@ -670,6 +676,11 @@ if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){ $target_gfx906 = 1; $default_amdgpu_target = 0; } + if($target eq 'gfx908') + { + $target_gfx908 = 1; + $default_amdgpu_target = 0; + } if($target eq 'gfx1010') { $target_gfx1010 = 1; @@ -715,6 +726,10 @@ if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){ $target_gfx906 = 1; $default_amdgpu_target = 0; } + if($val eq "gfx908") { + $target_gfx908 = 1; + $default_amdgpu_target = 0; + } if($val eq 'gfx1010') { $target_gfx1010 = 1; @@ -789,6 +804,14 @@ if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){ } $HIPCXXFLAGS .= " -D__HIP_ARCH_GFX906__=1 "; } + if ($target_gfx908 eq 1) { + $GPU_ARCH_ARG = $GPU_ARCH_OPT . "gfx908"; + $HIPLDFLAGS .= $GPU_ARCH_ARG; + if ($HIP_PLATFORM eq 'clang') { + $HIPCXXFLAGS .= $GPU_ARCH_ARG;; + } + $HIPCXXFLAGS .= " -D__HIP_ARCH_GFX908__=1 "; + } if ($target_gfx1010 eq 1) { $GPU_ARCH_ARG = $GPU_ARCH_OPT . "gfx1010"; $HIPLDFLAGS .= $GPU_ARCH_ARG; diff --git a/lpl_ca/ca.hpp b/lpl_ca/ca.hpp index 0ef8458c20..db63f02498 100644 --- a/lpl_ca/ca.hpp +++ b/lpl_ca/ca.hpp @@ -23,7 +23,7 @@ inline clara::Parser cmdline_parser(bool& help, std::vector& inputs "https://reviews.llvm.org/D13909; " "the code object format is documented at: " "https://www.llvm.org/docs/AMDGPUUsage.html#code-object.") | - clara::Opt{targets, "gfx803,gfx900,gfx906 etc."}["-t"]["--targets"]( + clara::Opt{targets, "gfx803,gfx900,gfx906,gfx908 etc."}["-t"]["--targets"]( "targets for which code objects are to be extracted from " "the fat binary; must be included in the set of processors " "with ROCm support from " diff --git a/lpl_ca/common.hpp b/lpl_ca/common.hpp index 7652f08c46..2622a8f76b 100644 --- a/lpl_ca/common.hpp +++ b/lpl_ca/common.hpp @@ -12,7 +12,7 @@ namespace hip_impl { inline const std::unordered_set& amdgpu_targets() { // The evolving list lives at: // https://www.llvm.org/docs/AMDGPUUsage.html#processors. static const std::unordered_set r{"gfx701", "gfx801", "gfx802", "gfx803", - "gfx900", "gfx906"}; + "gfx900", "gfx906", "gfx908"}; return r; } diff --git a/lpl_ca/lpl.hpp b/lpl_ca/lpl.hpp index 3eeb88bd22..941f30123a 100644 --- a/lpl_ca/lpl.hpp +++ b/lpl_ca/lpl.hpp @@ -132,7 +132,7 @@ inline clara::Parser cmdline_parser(bool& help, std::vector& source "file is documented at: https://reviews.llvm.org/D13909.") | clara::Arg{sources, "a.cpp b.cpp etc."}("inputs for compilation; must contain valid C++ code.") | - clara::Opt{targets, "gfx803,gfx900,gfx906 etc."}["-t"]["--targets"]( + clara::Opt{targets, "gfx803,gfx900,gfx906,gfx908 etc."}["-t"]["--targets"]( "targets for AMDGPU lowering; must be included in the set " "of processors with ROCm support from " "https://www.llvm.org/docs/AMDGPUUsage.html#processors."); diff --git a/tests/src/deviceLib/hipMathFunctions.cpp b/tests/src/deviceLib/hipMathFunctions.cpp index 6553094603..dc064da189 100644 --- a/tests/src/deviceLib/hipMathFunctions.cpp +++ b/tests/src/deviceLib/hipMathFunctions.cpp @@ -29,7 +29,7 @@ THE SOFTWARE. #include "hip/hip_runtime.h" #include "test_common.h" -#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__ +#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__ || __HIP_ARCH_GFX908__ __global__ void kernel_abs_int64(long long *input, long long *output) { int tx = threadIdx.x; diff --git a/tests/src/deviceLib/hipTestHalf.cpp b/tests/src/deviceLib/hipTestHalf.cpp index 96447960c1..d48ce9b4f6 100644 --- a/tests/src/deviceLib/hipTestHalf.cpp +++ b/tests/src/deviceLib/hipTestHalf.cpp @@ -28,7 +28,7 @@ THE SOFTWARE. #include "test_common.h" -#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__ +#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__ || __HIP_ARCH_GFX908__ __global__ void __halfMath(bool* result, __half a) { diff --git a/tests/src/deviceLib/hipTestNativeHalf.cpp b/tests/src/deviceLib/hipTestNativeHalf.cpp index 17aa54a4cf..55213d446d 100644 --- a/tests/src/deviceLib/hipTestNativeHalf.cpp +++ b/tests/src/deviceLib/hipTestNativeHalf.cpp @@ -32,7 +32,7 @@ THE SOFTWARE. using namespace std; -#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__ +#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__ || __HIP_ARCH_GFX908__ __global__ void __halfTest(bool* result, __half a) {