Merge pull request #444 from aaronenyeshi/vg20-initial

initial gfx906 support

[ROCm/hip commit: 67d45164fa]
Этот коммит содержится в:
Maneesh Gupta
2018-05-18 13:18:07 +05:30
коммит произвёл GitHub
родитель 183fce6294 9a591d6c26
Коммит 3da5fac84b
7 изменённых файлов: 30 добавлений и 9 удалений
+1 -1
Просмотреть файл
@@ -201,7 +201,7 @@ if(HIP_PLATFORM STREQUAL "hcc")
execute_process(COMMAND ${HCC_HOME}/bin/hcc-config --ldflags OUTPUT_VARIABLE HCC_LD_FLAGS)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${HCC_LD_FLAGS} -Wl,-Bsymbolic")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --amdgpu-target=gfx701 --amdgpu-target=gfx803 --amdgpu-target=gfx900")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --amdgpu-target=gfx701 --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906")
if(COMPILE_HIP_ATP_MARKER)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L/opt/rocm/profiler/CXLActivityLogger/bin/x86_64 -lCXLActivityLogger")
endif()
+21
Просмотреть файл
@@ -75,6 +75,7 @@ $target_gfx801 = 0;
$target_gfx802 = 0;
$target_gfx803 = 0;
$target_gfx900 = 0;
$target_gfx906 = 0;
$default_amdgpu_target = 1;
if ($HIP_PLATFORM eq "hcc") {
@@ -281,6 +282,12 @@ foreach $arg (@ARGV)
$target_gfx900 = 1;
$default_amdgpu_target = 0;
}
if($arg eq '--amdgpu-target=gfx906')
{
$target_gfx906 = 1;
$default_amdgpu_target = 0;
}
if(($trimarg eq '-stdlib=libstdc++') and ($setStdLib eq 0))
{
@@ -373,6 +380,11 @@ if($HIP_PLATFORM eq "hcc"){
$target_gfx900 = 1;
$default_amdgpu_target = 0;
}
if($target eq 'gfx906')
{
$target_gfx906 = 1;
$default_amdgpu_target = 0;
}
}
}
# Else try using rocm_agent_enumerator
@@ -404,6 +416,10 @@ if($HIP_PLATFORM eq "hcc"){
$target_gfx900 = 1;
$default_amdgpu_target = 0;
}
if($val eq "gfx906") {
$target_gfx906 = 1;
$default_amdgpu_target = 0;
}
}
}
# rocm_agent_enumerator failed! Throw an error and die if linking is required
@@ -437,6 +453,11 @@ if($HIP_PLATFORM eq "hcc"){
$HIPCXXFLAGS .= " -D__HIP_ARCH_GFX900__=1 ";
$ENV{HCC_EXTRA_LIBRARIES_GFX900}="$HIP_PATH/lib/hip_hc_gfx803.ll\n";
}
if ($target_gfx906 eq 1) {
$HIPLDFLAGS .= " --amdgpu-target=gfx906";
$HIPCXXFLAGS .= " -D__HIP_ARCH_GFX906__=1 ";
$ENV{HCC_EXTRA_LIBRARIES_GFX906}="$HIP_PATH/lib/hip_hc_gfx803.ll\n";
}
}
if ($hasC and $HIP_PLATFORM eq 'nvcc') {
+2 -2
Просмотреть файл
@@ -23,7 +23,7 @@ inline clara::Parser cmdline_parser(bool& help, std::vector<std::string>& inputs
"https://reviews.llvm.org/D13909; "
"the code object format is documented at: "
"https://www.llvm.org/docs/AMDGPUUsage.html#code-object.") |
clara::Opt{targets, "gfx803,gfx900 etc."}["-t"]["--targets"](
clara::Opt{targets, "gfx803,gfx900,gfx906 etc."}["-t"]["--targets"](
"targets for which code objects are to be extracted from "
"the fat binary; must be included in the set of processors "
"with ROCm support from "
@@ -76,4 +76,4 @@ inline void validate_inputs(const std::vector<std::string>& inputs) {
throw std::runtime_error{"Non existent file " + *it + " passed as input."};
}
}
} // namespace hip_impl
} // namespace hip_impl
+2 -2
Просмотреть файл
@@ -12,7 +12,7 @@ namespace hip_impl {
inline const std::unordered_set<std::string>& amdgpu_targets() { // The evolving list lives at:
// https://www.llvm.org/docs/AMDGPUUsage.html#processors.
static const std::unordered_set<std::string> r{"gfx701", "gfx801", "gfx802", "gfx803",
"gfx900"};
"gfx900", "gfx906"};
return r;
}
@@ -77,4 +77,4 @@ inline void validate_targets(const std::vector<std::string>& x) {
}
}
}
} // Namespace hip_impl.
} // Namespace hip_impl.
+2 -2
Просмотреть файл
@@ -132,9 +132,9 @@ inline clara::Parser cmdline_parser(bool& help, std::vector<std::string>& source
"file is documented at: https://reviews.llvm.org/D13909.") |
clara::Arg{sources,
"a.cpp b.cpp etc."}("inputs for compilation; must contain valid C++ code.") |
clara::Opt{targets, "gfx803,gfx900 etc."}["-t"]["--targets"](
clara::Opt{targets, "gfx803,gfx900,gfx906 etc."}["-t"]["--targets"](
"targets for AMDGPU lowering; must be included in the set "
"of processors with ROCm support from "
"https://www.llvm.org/docs/AMDGPUUsage.html#processors.");
}
} // namespace hip_impl
} // namespace hip_impl
+1 -1
Просмотреть файл
@@ -29,7 +29,7 @@ THE SOFTWARE.
#include "hip/hip_runtime.h"
#include "test_common.h"
#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__
#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__
__global__ void kernel_abs_int64(hipLaunchParm lp, long long *input, long long *output) {
int tx = threadIdx.x;
+1 -1
Просмотреть файл
@@ -32,7 +32,7 @@ THE SOFTWARE.
#define HALF_SIZE 64 * sizeof(__half)
#define HALF2_SIZE 64 * sizeof(__half2)
#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__
#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__
__global__ void __halfMath(hipLaunchParm lp, __half* A, __half* B, __half* C) {
int tx = threadIdx.x;