Merge pull request #444 from aaronenyeshi/vg20-initial
initial gfx906 support
[ROCm/hip commit: 67d45164fa]
Αυτή η υποβολή περιλαμβάνεται σε:
@@ -201,7 +201,7 @@ if(HIP_PLATFORM STREQUAL "hcc")
|
||||
|
||||
execute_process(COMMAND ${HCC_HOME}/bin/hcc-config --ldflags OUTPUT_VARIABLE HCC_LD_FLAGS)
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${HCC_LD_FLAGS} -Wl,-Bsymbolic")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --amdgpu-target=gfx701 --amdgpu-target=gfx803 --amdgpu-target=gfx900")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --amdgpu-target=gfx701 --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906")
|
||||
if(COMPILE_HIP_ATP_MARKER)
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L/opt/rocm/profiler/CXLActivityLogger/bin/x86_64 -lCXLActivityLogger")
|
||||
endif()
|
||||
|
||||
@@ -75,6 +75,7 @@ $target_gfx801 = 0;
|
||||
$target_gfx802 = 0;
|
||||
$target_gfx803 = 0;
|
||||
$target_gfx900 = 0;
|
||||
$target_gfx906 = 0;
|
||||
$default_amdgpu_target = 1;
|
||||
|
||||
if ($HIP_PLATFORM eq "hcc") {
|
||||
@@ -281,6 +282,12 @@ foreach $arg (@ARGV)
|
||||
$target_gfx900 = 1;
|
||||
$default_amdgpu_target = 0;
|
||||
}
|
||||
if($arg eq '--amdgpu-target=gfx906')
|
||||
{
|
||||
$target_gfx906 = 1;
|
||||
$default_amdgpu_target = 0;
|
||||
}
|
||||
|
||||
|
||||
if(($trimarg eq '-stdlib=libstdc++') and ($setStdLib eq 0))
|
||||
{
|
||||
@@ -373,6 +380,11 @@ if($HIP_PLATFORM eq "hcc"){
|
||||
$target_gfx900 = 1;
|
||||
$default_amdgpu_target = 0;
|
||||
}
|
||||
if($target eq 'gfx906')
|
||||
{
|
||||
$target_gfx906 = 1;
|
||||
$default_amdgpu_target = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
# Else try using rocm_agent_enumerator
|
||||
@@ -404,6 +416,10 @@ if($HIP_PLATFORM eq "hcc"){
|
||||
$target_gfx900 = 1;
|
||||
$default_amdgpu_target = 0;
|
||||
}
|
||||
if($val eq "gfx906") {
|
||||
$target_gfx906 = 1;
|
||||
$default_amdgpu_target = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
# rocm_agent_enumerator failed! Throw an error and die if linking is required
|
||||
@@ -437,6 +453,11 @@ if($HIP_PLATFORM eq "hcc"){
|
||||
$HIPCXXFLAGS .= " -D__HIP_ARCH_GFX900__=1 ";
|
||||
$ENV{HCC_EXTRA_LIBRARIES_GFX900}="$HIP_PATH/lib/hip_hc_gfx803.ll\n";
|
||||
}
|
||||
if ($target_gfx906 eq 1) {
|
||||
$HIPLDFLAGS .= " --amdgpu-target=gfx906";
|
||||
$HIPCXXFLAGS .= " -D__HIP_ARCH_GFX906__=1 ";
|
||||
$ENV{HCC_EXTRA_LIBRARIES_GFX906}="$HIP_PATH/lib/hip_hc_gfx803.ll\n";
|
||||
}
|
||||
}
|
||||
|
||||
if ($hasC and $HIP_PLATFORM eq 'nvcc') {
|
||||
|
||||
@@ -23,7 +23,7 @@ inline clara::Parser cmdline_parser(bool& help, std::vector<std::string>& inputs
|
||||
"https://reviews.llvm.org/D13909; "
|
||||
"the code object format is documented at: "
|
||||
"https://www.llvm.org/docs/AMDGPUUsage.html#code-object.") |
|
||||
clara::Opt{targets, "gfx803,gfx900 etc."}["-t"]["--targets"](
|
||||
clara::Opt{targets, "gfx803,gfx900,gfx906 etc."}["-t"]["--targets"](
|
||||
"targets for which code objects are to be extracted from "
|
||||
"the fat binary; must be included in the set of processors "
|
||||
"with ROCm support from "
|
||||
@@ -76,4 +76,4 @@ inline void validate_inputs(const std::vector<std::string>& inputs) {
|
||||
throw std::runtime_error{"Non existent file " + *it + " passed as input."};
|
||||
}
|
||||
}
|
||||
} // namespace hip_impl
|
||||
} // namespace hip_impl
|
||||
|
||||
@@ -12,7 +12,7 @@ namespace hip_impl {
|
||||
inline const std::unordered_set<std::string>& amdgpu_targets() { // The evolving list lives at:
|
||||
// https://www.llvm.org/docs/AMDGPUUsage.html#processors.
|
||||
static const std::unordered_set<std::string> r{"gfx701", "gfx801", "gfx802", "gfx803",
|
||||
"gfx900"};
|
||||
"gfx900", "gfx906"};
|
||||
|
||||
return r;
|
||||
}
|
||||
@@ -77,4 +77,4 @@ inline void validate_targets(const std::vector<std::string>& x) {
|
||||
}
|
||||
}
|
||||
}
|
||||
} // Namespace hip_impl.
|
||||
} // Namespace hip_impl.
|
||||
|
||||
@@ -132,9 +132,9 @@ inline clara::Parser cmdline_parser(bool& help, std::vector<std::string>& source
|
||||
"file is documented at: https://reviews.llvm.org/D13909.") |
|
||||
clara::Arg{sources,
|
||||
"a.cpp b.cpp etc."}("inputs for compilation; must contain valid C++ code.") |
|
||||
clara::Opt{targets, "gfx803,gfx900 etc."}["-t"]["--targets"](
|
||||
clara::Opt{targets, "gfx803,gfx900,gfx906 etc."}["-t"]["--targets"](
|
||||
"targets for AMDGPU lowering; must be included in the set "
|
||||
"of processors with ROCm support from "
|
||||
"https://www.llvm.org/docs/AMDGPUUsage.html#processors.");
|
||||
}
|
||||
} // namespace hip_impl
|
||||
} // namespace hip_impl
|
||||
|
||||
@@ -29,7 +29,7 @@ THE SOFTWARE.
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "test_common.h"
|
||||
|
||||
#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__
|
||||
#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__
|
||||
|
||||
__global__ void kernel_abs_int64(hipLaunchParm lp, long long *input, long long *output) {
|
||||
int tx = threadIdx.x;
|
||||
|
||||
@@ -32,7 +32,7 @@ THE SOFTWARE.
|
||||
#define HALF_SIZE 64 * sizeof(__half)
|
||||
#define HALF2_SIZE 64 * sizeof(__half2)
|
||||
|
||||
#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__
|
||||
#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__
|
||||
|
||||
__global__ void __halfMath(hipLaunchParm lp, __half* A, __half* B, __half* C) {
|
||||
int tx = threadIdx.x;
|
||||
|
||||
Αναφορά σε νέο ζήτημα
Block a user