Merge branch 'amd-master-next' into amd-npi-next
Change-Id: Ie692324c8cf6215bf6cf87de0d5e3aa1bdfd5ea1
This commit is contained in:
@@ -10,7 +10,6 @@ bin/hipInfo
|
||||
bin/hipBusBandwidth
|
||||
bin/hipDispatchLatency
|
||||
bin/hipify-clang
|
||||
include/hip/hip_version.h
|
||||
tags
|
||||
samples/1_Utils/hipInfo/hipInfo
|
||||
samples/1_Utils/hipBusBandwidth/hipBusBandwidth
|
||||
|
||||
@@ -234,7 +234,7 @@ endif (NOT CPACK_SET_DESTDIR)
|
||||
# Generate profiling API macros/structures header
|
||||
if(HIP_PLATFORM STREQUAL "hcc")
|
||||
if(USE_PROF_API EQUAL 1)
|
||||
set(PROF_API_STR "${CMAKE_CURRENT_SOURCE_DIR}/include/hip/hcc_detail/hip_prof_str.h")
|
||||
set(PROF_API_STR "${PROJECT_BINARY_DIR}/include/hip/hcc_detail/hip_prof_str.h")
|
||||
set(PROF_API_HDR "${CMAKE_CURRENT_SOURCE_DIR}/include/hip/hcc_detail/hip_runtime_api.h")
|
||||
set(PROF_API_SRC "${CMAKE_CURRENT_SOURCE_DIR}/src")
|
||||
set(PROF_API_GEN "${CMAKE_CURRENT_SOURCE_DIR}/hip_prof_gen.py")
|
||||
@@ -315,6 +315,7 @@ endif()
|
||||
message(STATUS "\nHSA runtime in: " ${HSA_PATH})
|
||||
# Build hip_hcc if platform is hcc
|
||||
if(HIP_PLATFORM STREQUAL "hcc")
|
||||
include_directories(${PROJECT_BINARY_DIR}/include)
|
||||
include_directories(${PROJECT_SOURCE_DIR}/include)
|
||||
set(HIP_HCC_BUILD_FLAGS)
|
||||
|
||||
@@ -425,7 +426,8 @@ set(_versionInfoHeader
|
||||
#define HIP_VERSION (HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR)\n
|
||||
#endif\n
|
||||
")
|
||||
file(WRITE "${CMAKE_CURRENT_SOURCE_DIR}/include/hip/hip_version.h" ${_versionInfoHeader})
|
||||
file(WRITE "${PROJECT_BINARY_DIR}/include/hip/hip_version.h" ${_versionInfoHeader})
|
||||
include_directories(${PROJECT_BINARY_DIR}/include)
|
||||
|
||||
# Build doxygen documentation
|
||||
find_program(DOXYGEN_EXE doxygen)
|
||||
@@ -457,7 +459,11 @@ install(FILES ${PROJECT_BINARY_DIR}/.hipVersion DESTINATION bin)
|
||||
execute_process(COMMAND test ${CMAKE_INSTALL_PREFIX} -ef ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
RESULT_VARIABLE INSTALL_SOURCE)
|
||||
if(NOT ${INSTALL_SOURCE} EQUAL 0)
|
||||
install(DIRECTORY src DESTINATION .)
|
||||
if(HIP_RUNTIME STREQUAL "HCC")
|
||||
install(DIRECTORY src DESTINATION .)
|
||||
elseif(HIP_RUNTIME STREQUAL "ROCclr")
|
||||
install(DIRECTORY rocclr DESTINATION .)
|
||||
endif()
|
||||
install(DIRECTORY bin DESTINATION . USE_SOURCE_PERMISSIONS)
|
||||
install(DIRECTORY include DESTINATION .)
|
||||
install(DIRECTORY cmake DESTINATION .)
|
||||
@@ -468,7 +474,9 @@ endif()
|
||||
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/hip/hcc_detail
|
||||
DESTINATION include/hip
|
||||
FILES_MATCHING PATTERN "*.h*")
|
||||
|
||||
install(DIRECTORY ${PROJECT_BINARY_DIR}/include/hip
|
||||
DESTINATION include
|
||||
FILES_MATCHING PATTERN "*.h*")
|
||||
|
||||
#############################
|
||||
# hip-config
|
||||
|
||||
@@ -34,26 +34,35 @@ defined $options{i} || die("input not specified");
|
||||
$input_file = $options{i};
|
||||
(-f $input_file) || die("can't find $input_file");
|
||||
|
||||
# derive HIP_PATH via env var or use parent directory of extractkernel
|
||||
my $HIP_PATH=$ENV{'HIP_PATH'} // dirname(Cwd::abs_path("$0/../"));
|
||||
my $HIP_COMPILER = `$HIP_PATH/bin/hipconfig --compiler`;
|
||||
my $ROCM_PATH = `$HIP_PATH/bin/hipconfig --rocmpath`;
|
||||
my $HIP_CLANG_PATH = `$HIP_PATH/bin/hipconfig --hipclangpath`;
|
||||
|
||||
# look for llvm-objdump and clang-offload-bundler
|
||||
my $tools_path_prefix;
|
||||
my $llvm_objdump;
|
||||
my $clang_offload_bundler;
|
||||
|
||||
if (defined $ENV{'HCC_HOME'}) {
|
||||
$tools_path_prefix = File::Spec->catfile($ENV{'HCC_HOME'}, "bin");
|
||||
$llvm_objdump = File::Spec->catfile($tools_path_prefix, "llvm-objdump");
|
||||
$clang_offload_bundler = File::Spec->catfile($tools_path_prefix, "clang-offload-bundler");
|
||||
if (defined $HIP_COMPILER and $HIP_COMPILER eq "clang"){
|
||||
# Search the path with respect to HIP_CLANG_PATH
|
||||
$tools_path_prefix = $HIP_CLANG_PATH;
|
||||
}
|
||||
else {
|
||||
$tools_path_prefix = dirname(realpath($0));
|
||||
$llvm_objdump = File::Spec->catfile($tools_path_prefix, "llvm-objdump");
|
||||
$clang_offload_bundler = File::Spec->catfile($tools_path_prefix, "clang-offload-bundler");
|
||||
if (!(-f $llvm_objdump)) {
|
||||
$tools_path_prefix = realpath($tools_path_prefix."/../../hcc/bin");
|
||||
$llvm_objdump = File::Spec->catfile($tools_path_prefix, "llvm-objdump");
|
||||
$clang_offload_bundler = File::Spec->catfile($tools_path_prefix, "clang-offload-bundler");
|
||||
if (defined $HIP_COMPILER and $HIP_COMPILER eq "hcc") {
|
||||
# Search the path with respect to HCC_HOME if it is set, else search in ROCM_PATH
|
||||
if (defined $ENV{'HCC_HOME'}) {
|
||||
$tools_path_prefix = File::Spec->catfile($ENV{'HCC_HOME'}, "bin");
|
||||
}
|
||||
else {
|
||||
$tools_path_prefix = realpath($ROCM_PATH."/hcc/bin");
|
||||
}
|
||||
}
|
||||
}
|
||||
# Find llvm-objdump and clang-offload-bundler in the path set above
|
||||
$llvm_objdump = File::Spec->catfile($tools_path_prefix, "llvm-objdump");
|
||||
$clang_offload_bundler = File::Spec->catfile($tools_path_prefix, "clang-offload-bundler");
|
||||
|
||||
if (!(-f $llvm_objdump)) {
|
||||
$llvm_objdump = which("llvm-objdump");
|
||||
|
||||
+45
-22
@@ -26,7 +26,7 @@ use Cwd 'abs_path';
|
||||
# script's abs_path). Used on AMD platforms only.
|
||||
# HSA_PATH : Path to HSA dir (defaults to ../../hsa relative to abs_path
|
||||
# of this script). Used on AMD platforms only.
|
||||
# HIP_ROCclr_HOME : Path to HIP/ROCclr directory. Used on AMD platforms only.
|
||||
# HIP_ROCCLR_HOME : Path to HIP/ROCclr directory. Used on AMD platforms only.
|
||||
# HIP_CLANG_PATH : Path to HIP-Clang (default to ../../llvm/bin relative to this
|
||||
# script's abs_path). Used on AMD platforms only.
|
||||
|
||||
@@ -82,15 +82,15 @@ if (-e "$HIP_PATH/../.info/version") {
|
||||
} else {
|
||||
$ROCM_PATH=$ENV{'ROCM_PATH'} // "/opt/rocm";
|
||||
}
|
||||
$HIP_ROCclr_HOME=$ENV{'HIP_ROCclr_HOME'};
|
||||
$HIP_ROCCLR_HOME=$ENV{'HIP_ROCCLR_HOME'};
|
||||
$HIP_LIB_PATH=$ENV{'HIP_LIB_PATH'};
|
||||
$HIP_CLANG_PATH=$ENV{'HIP_CLANG_PATH'};
|
||||
$DEVICE_LIB_PATH=$ENV{'DEVICE_LIB_PATH'};
|
||||
$HIP_CLANG_HCC_COMPAT_MODE=$ENV{'HIP_CLANG_HCC_COMPAT_MODE'}; # HCC compatibility mode
|
||||
$HIP_COMPILE_CXX_AS_HIP=$ENV{'HIP_COMPILE_CXX_AS_HIP'} // "1";
|
||||
|
||||
if (defined $HIP_ROCclr_HOME) {
|
||||
$HIP_INFO_PATH= "$HIP_ROCclr_HOME/lib/.hipInfo";
|
||||
if (defined $HIP_ROCCLR_HOME) {
|
||||
$HIP_INFO_PATH= "$HIP_ROCCLR_HOME/lib/.hipInfo";
|
||||
} else {
|
||||
$HIP_INFO_PATH= "$HIP_PATH/lib/.hipInfo"; # use actual file
|
||||
}
|
||||
@@ -123,35 +123,43 @@ sub delete_temp_dirs {
|
||||
|
||||
#---
|
||||
#HIP_PLATFORM controls whether to use hcc (AMD) or nvcc as the platform:
|
||||
$HIP_PLATFORM= `$HIP_PATH/bin/hipconfig --platform` // "hcc";
|
||||
$HIP_VERSION= `$HIP_PATH/bin/hipconfig --version`;
|
||||
#HIP_COMPILER controls whether to use hcc, clang or nvcc for compilation:
|
||||
$HIP_COMPILER= `$HIP_PATH/bin/hipconfig --compiler`;
|
||||
#HIP_RUNTIME controls whether to use HCC, ROCclr, or NVCC as the runtime:
|
||||
$HIP_RUNTIME= `$HIP_PATH/bin/hipconfig --runtime`;
|
||||
if ($isWindows) {
|
||||
# Windows cannot run perl natively, so hipcc will explicitly call perl
|
||||
$HIP_PLATFORM= `perl $HIP_PATH/bin/hipconfig --platform`;
|
||||
$HIP_VERSION= `perl $HIP_PATH/bin/hipconfig --version`;
|
||||
$HIP_COMPILER= `perl $HIP_PATH/bin/hipconfig --compiler`;
|
||||
$HIP_RUNTIME= `perl $HIP_PATH/bin/hipconfig --runtime`;
|
||||
} else {
|
||||
$HIP_PLATFORM= `$HIP_PATH/bin/hipconfig --platform`;
|
||||
$HIP_VERSION= `$HIP_PATH/bin/hipconfig --version`;
|
||||
$HIP_COMPILER= `$HIP_PATH/bin/hipconfig --compiler`;
|
||||
$HIP_RUNTIME= `$HIP_PATH/bin/hipconfig --runtime`;
|
||||
}
|
||||
|
||||
# If using ROCclr runtime, need to find HIP_ROCclr_HOME
|
||||
if (defined $HIP_RUNTIME and $HIP_RUNTIME eq "ROCclr" and !defined $HIP_ROCclr_HOME) {
|
||||
# If using ROCclr runtime, need to find HIP_ROCCLR_HOME
|
||||
if (defined $HIP_RUNTIME and $HIP_RUNTIME eq "ROCclr" and !defined $HIP_ROCCLR_HOME) {
|
||||
my $hipcc_dir = dirname($0);
|
||||
if (-e "$hipcc_dir/../lib/bitcode") {
|
||||
$HIP_ROCclr_HOME = abs_path($hipcc_dir . "/..");
|
||||
$HIP_ROCCLR_HOME = abs_path($hipcc_dir . "/..");
|
||||
} else {
|
||||
$HIP_ROCclr_HOME = $HIP_PATH; # use HIP_PATH
|
||||
$HIP_ROCCLR_HOME = $HIP_PATH; # use HIP_PATH
|
||||
}
|
||||
$HIPCXXFLAGS .= "-D__HIP_ROCclr__";
|
||||
$HIPCFLAGS .= "-D__HIP_ROCclr__";
|
||||
}
|
||||
|
||||
if (defined $HIP_ROCclr_HOME) {
|
||||
if (!defined $HIP_CLANG_PATH and (-e "$HIP_ROCclr_HOME/bin/clang" or -e "$HIP_ROCclr_HOME/bin/clang.exe")) {
|
||||
$HIP_CLANG_PATH = "$HIP_ROCclr_HOME/bin";
|
||||
if (defined $HIP_ROCCLR_HOME) {
|
||||
if (!defined $HIP_CLANG_PATH and (-e "$HIP_ROCCLR_HOME/bin/clang" or -e "$HIP_ROCCLR_HOME/bin/clang.exe")) {
|
||||
$HIP_CLANG_PATH = "$HIP_ROCCLR_HOME/bin";
|
||||
}
|
||||
if (!defined $DEVICE_LIB_PATH and -e "$HIP_ROCclr_HOME/lib/bitcode") {
|
||||
$DEVICE_LIB_PATH = "$HIP_ROCclr_HOME/lib/bitcode";
|
||||
if (!defined $DEVICE_LIB_PATH and -e "$HIP_ROCCLR_HOME/lib/bitcode") {
|
||||
$DEVICE_LIB_PATH = "$HIP_ROCCLR_HOME/lib/bitcode";
|
||||
}
|
||||
$HIP_INCLUDE_PATH = "$HIP_ROCclr_HOME/include";
|
||||
$HIP_INCLUDE_PATH = "$HIP_ROCCLR_HOME/include";
|
||||
if (!defined $HIP_LIB_PATH) {
|
||||
$HIP_LIB_PATH = "$HIP_ROCclr_HOME/lib";
|
||||
$HIP_LIB_PATH = "$HIP_ROCCLR_HOME/lib";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -199,8 +207,8 @@ if ($HIP_PLATFORM eq "hcc" and $HIP_COMPILER eq "clang") {
|
||||
$HIP_LIB_PATH = "$HIP_PATH/lib";
|
||||
}
|
||||
if ($verbose & 0x2) {
|
||||
if (defined $HIP_ROCclr_HOME) {
|
||||
print ("HIP_ROCclr_HOME=$HIP_ROCclr_HOME\n");
|
||||
if (defined $HIP_ROCCLR_HOME) {
|
||||
print ("HIP_ROCCLR_HOME=$HIP_ROCCLR_HOME\n");
|
||||
}
|
||||
print ("HIP_CLANG_PATH=$HIP_CLANG_PATH\n");
|
||||
print ("HIP_CLANG_INCLUDE_PATH=$HIP_CLANG_INCLUDE_PATH\n");
|
||||
@@ -393,6 +401,7 @@ if($HIP_PLATFORM eq "nvcc"){
|
||||
}
|
||||
}
|
||||
|
||||
# TODO: convert toolArgs to an array rather than a string
|
||||
my $toolArgs = ""; # arguments to pass to the hcc or nvcc tool
|
||||
my $optArg = ""; # -O args
|
||||
my $targetOpt = '--amdgpu-target=';
|
||||
@@ -402,7 +411,11 @@ my $prevArg = ""; # previous argument
|
||||
|
||||
foreach $arg (@ARGV)
|
||||
{
|
||||
# Save $arg, it can get changed in the loop.
|
||||
$trimarg = $arg;
|
||||
# TODO: figure out why this space removal is wanted.
|
||||
# TODO: If someone has gone to the effort of quoting the spaces to the shell
|
||||
# TODO: why are we removing it here?
|
||||
$trimarg =~ s/^\s+|\s+$//g; # Remive whitespace
|
||||
my $swallowArg = 0;
|
||||
if ($arg eq '-c' or $arg eq '--genco' or $arg eq '-E') {
|
||||
@@ -411,6 +424,7 @@ foreach $arg (@ARGV)
|
||||
}
|
||||
|
||||
if ($skipOutputFile) {
|
||||
# TODO: handle filename with shell metacharacters
|
||||
$toolArgs .= " $arg";
|
||||
$prevArg = $arg;
|
||||
$skipOutputFile = 0;
|
||||
@@ -452,7 +466,7 @@ foreach $arg (@ARGV)
|
||||
$arg = "--cuda-device-only";
|
||||
}
|
||||
|
||||
if(($trimarg eq '-stdlib=libstdc++') and ($setStdLib eq 0))
|
||||
if(($trimarg eq '-stdlib=libstdc++') and ($setStdLib eq 0) and $HIP_PLATFORM eq 'hcc' and $HIP_COMPILER eq 'hcc')
|
||||
{
|
||||
$HIPCXXFLAGS .= $HCC_WA_FLAGS;
|
||||
$setStdLib = 1;
|
||||
@@ -659,6 +673,14 @@ foreach $arg (@ARGV)
|
||||
push (@inputs, $arg);
|
||||
#print "I: <$arg>\n";
|
||||
}
|
||||
# Produce a version of $arg where characters significant to the shell are
|
||||
# quoted. One could quote everything of course but don't bother for
|
||||
# common characters such as alphanumerics.
|
||||
# Do the quoting here because sometimes the $arg is changed in the loop
|
||||
# Important to have all of '-Xlinker' in the set of unquoted characters.
|
||||
if (not $isWindows) { # Windows needs different quoting, ignore for now
|
||||
$arg =~ s/[^-a-zA-Z0-9_=+,.\/]/\\$&/g;
|
||||
}
|
||||
$toolArgs .= " $arg" unless $swallowArg;
|
||||
$prevArg = $arg;
|
||||
}
|
||||
@@ -805,6 +827,7 @@ if ($HIPCC_LINK_FLAGS_APPEND) {
|
||||
$HIPLDFLAGS .= " $HIPCC_LINK_FLAGS_APPEND";
|
||||
}
|
||||
|
||||
# TODO: convert CMD to an array rather than a string
|
||||
my $CMD="$HIPCC";
|
||||
|
||||
if ($needCFLAGS) {
|
||||
|
||||
@@ -17,9 +17,11 @@ Getopt::Long::Configure ( qw{bundling no_ignore_case});
|
||||
GetOptions(
|
||||
"help|h" => \$p_help
|
||||
,"path|p" => \$p_path
|
||||
,"rocmpath|R" => \$p_rocmpath
|
||||
,"compiler|c" => \$p_compiler
|
||||
,"platform|P" => \$p_platform
|
||||
,"runtime|r" => \$p_runtime
|
||||
,"hipclangpath|l" => \$p_hipclangpath
|
||||
,"cpp_config|cxx_config|C" => \$p_cpp_config
|
||||
,"full|f|info" => \$p_full,
|
||||
,"version|v" => \$p_version,
|
||||
@@ -30,10 +32,12 @@ GetOptions(
|
||||
if ($p_help) {
|
||||
print "usage: hipconfig [OPTIONS]\n";
|
||||
print " --path, -p : print HIP_PATH (use env var if set, else determine from hipconfig path)\n";
|
||||
print " --rocmpath, -R : print ROCM_PATH (use env var if set, else determine from hip path or /opt/rocm)\n";
|
||||
print " --cpp_config, -C : print C++ compiler options\n";
|
||||
print " --compiler, -c : print compiler (hcc or clang or nvcc)\n";
|
||||
print " --platform, -P : print platform (hcc or nvcc)\n";
|
||||
print " --runtime, -r : print runtime (HCC or ROCclr)\n";
|
||||
print " --hipclangpath, -l : print HIP_CLANG_PATH\n";
|
||||
print " --full, -f : print full config\n";
|
||||
print " --version, -v : print hip version\n";
|
||||
print " --check : check configuration\n";
|
||||
@@ -85,15 +89,33 @@ $CUDA_PATH=$ENV{'CUDA_PATH'} // '/usr/local/cuda';
|
||||
$HCC_HOME=$ENV{'HCC_HOME'} // "$ROCM_PATH/hcc";
|
||||
$HSA_PATH=$ENV{'HSA_PATH'} // "$ROCM_PATH/hsa";
|
||||
$HIP_CLANG_PATH=$ENV{'HIP_CLANG_PATH'} // "$ROCM_PATH/llvm/bin";
|
||||
# HIP_ROCCLR_HOME is used by Windows builds
|
||||
$HIP_ROCCLR_HOME=$ENV{'HIP_ROCCLR_HOME'};
|
||||
|
||||
if (defined $HIP_ROCCLR_HOME) {
|
||||
$HIP_INFO_PATH= "$HIP_ROCCLR_HOME/lib/.hipInfo";
|
||||
} else {
|
||||
$HIP_INFO_PATH= "$HIP_PATH/lib/.hipInfo"; # use actual file
|
||||
}
|
||||
#---
|
||||
#HIP_PLATFORM controls whether to use NVCC or HCC for compilation:
|
||||
$HIP_PLATFORM=$ENV{'HIP_PLATFORM'};
|
||||
# Read .hipInfo
|
||||
my %hipInfo = ();
|
||||
parse_config_file("$HIP_PATH/lib/.hipInfo", \%hipInfo);
|
||||
$HIP_COMPILER = $hipInfo{'HIP_COMPILER'} // "hcc";
|
||||
$HIP_RUNTIME = $hipInfo{'HIP_RUNTIME'} // "HCC";
|
||||
parse_config_file("$HIP_INFO_PATH", \%hipInfo);
|
||||
# Prioritize Env first, otherwise use the hipInfo config file
|
||||
$HIP_COMPILER = $ENV{'HIP_COMPILER'} // $hipInfo{'HIP_COMPILER'} // "hcc";
|
||||
$HIP_RUNTIME = $ENV{'HIP_RUNTIME'} // $hipInfo{'HIP_RUNTIME'} // "HCC";
|
||||
|
||||
# If using ROCclr runtime, need to find HIP_ROCCLR_HOME
|
||||
if (defined $HIP_RUNTIME and $HIP_RUNTIME eq "ROCclr" and !defined $HIP_ROCCLR_HOME) {
|
||||
my $hipconfig_dir = dirname($0);
|
||||
if (-e "$hipconfig_dir/../lib/bitcode") {
|
||||
$HIP_ROCCLR_HOME = abs_path($hipconfig_dir . "/..");
|
||||
} else {
|
||||
$HIP_ROCCLR_HOME = $HIP_PATH; # use HIP_PATH
|
||||
}
|
||||
}
|
||||
|
||||
if (not defined $HIP_PLATFORM) {
|
||||
if (can_run("$HCC_HOME/bin/hcc") or can_run("hcc")) {
|
||||
@@ -112,6 +134,10 @@ if ($HIP_COMPILER eq "hcc") {
|
||||
$CPP_CONFIG = " -D__HIP_PLATFORM_HCC__= -I$HIP_PATH/include -I$HCC_HOME/include -I$HSA_PATH/include";
|
||||
}
|
||||
if ($HIP_COMPILER eq "clang") {
|
||||
# Windows does not have clang at linux default path
|
||||
if (defined $HIP_ROCCLR_HOME and (-e "$HIP_ROCCLR_HOME/bin/clang" or -e "$HIP_ROCCLR_HOME/bin/clang.exe")) {
|
||||
$HIP_CLANG_PATH = "$HIP_ROCCLR_HOME/bin";
|
||||
}
|
||||
$HIP_CLANG_VERSION = `$HIP_CLANG_PATH/clang++ --version`;
|
||||
$HIP_CLANG_VERSION=~/.*clang version ([^ ]+).*/;
|
||||
$HIP_CLANG_VERSION=$1;
|
||||
@@ -139,6 +165,11 @@ if ($p_path) {
|
||||
$printed = 1;
|
||||
}
|
||||
|
||||
if ($p_rocmpath) {
|
||||
print "$ROCM_PATH";
|
||||
$printed = 1;
|
||||
}
|
||||
|
||||
if ($p_cpp_config) {
|
||||
print $CPP_CONFIG;
|
||||
$printed = 1;
|
||||
@@ -159,6 +190,13 @@ if ($p_runtime) {
|
||||
$printed = 1;
|
||||
}
|
||||
|
||||
if ($p_hipclangpath) {
|
||||
if (defined $HIP_CLANG_PATH) {
|
||||
print $HIP_CLANG_PATH;
|
||||
}
|
||||
$printed = 1;
|
||||
}
|
||||
|
||||
if ($p_version) {
|
||||
print $HIP_VERSION;
|
||||
$printed = 1;
|
||||
@@ -168,6 +206,7 @@ if (!$printed or $p_full) {
|
||||
print "HIP version : ", $HIP_VERSION, "\n\n";
|
||||
print "== hipconfig\n";
|
||||
print "HIP_PATH : ", $HIP_PATH, "\n";
|
||||
print "ROCM_PATH : ", $ROCM_PATH, "\n";
|
||||
print "HIP_COMPILER : ", $HIP_COMPILER, "\n";
|
||||
print "HIP_PLATFORM : ", $HIP_PLATFORM, "\n";
|
||||
print "HIP_RUNTIME : ", $HIP_RUNTIME, "\n";
|
||||
|
||||
@@ -615,23 +615,25 @@ macro(HIP_ADD_EXECUTABLE hip_target)
|
||||
endif()
|
||||
if("${HIP_COMPILER}" STREQUAL "hcc")
|
||||
if("x${HCC_HOME}" STREQUAL "x")
|
||||
if (DEFINED $ENV{ROCM_PATH})
|
||||
set(HCC_HOME "$ENV{ROCM_PATH}/hcc")
|
||||
elseif( DEFINED $ENV{HIP_PATH})
|
||||
set(HCC_HOME "$ENV{HIP_PATH}/../hcc")
|
||||
if (DEFINED ENV{ROCM_PATH})
|
||||
set(HCC_HOME "$ENV{ROCM_PATH}/hcc")
|
||||
elseif(DEFINED ENV{HIP_PATH})
|
||||
set(HCC_HOME "$ENV{HIP_PATH}/../hcc")
|
||||
else()
|
||||
set(HCC_HOME "/opt/rocm/hcc")
|
||||
set(HCC_HOME "/opt/rocm/hcc")
|
||||
endif()
|
||||
endif()
|
||||
set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
|
||||
elseif("${HIP_COMPILER}" STREQUAL "clang")
|
||||
if("x${HIP_CLANG_PATH}" STREQUAL "x")
|
||||
if (DEFINED $ENV{ROCM_PATH})
|
||||
set(HIP_CLANG_PATH "$ENV{ROCM_PATH}/llvm/bin")
|
||||
elseif( DEFINED $ENV{HIP_PATH})
|
||||
set(HIP_CLANG_PATH "$ENV{HIP_PATH}/../llvm/bin")
|
||||
if("x${HIP_CLANG_PATH}" STREQUAL "x")
|
||||
if(DEFINED ENV{HIP_CLANG_PATH})
|
||||
set(HIP_CLANG_PATH $ENV{HIP_CLANG_PATH})
|
||||
elseif(DEFINED ENV{ROCM_PATH})
|
||||
set(HIP_CLANG_PATH "$ENV{ROCM_PATH}/llvm/bin")
|
||||
elseif(DEFINED ENV{HIP_PATH})
|
||||
set(HIP_CLANG_PATH "$ENV{HIP_PATH}/../llvm/bin")
|
||||
else()
|
||||
set(HIP_CLANG_PATH "/opt/rocm/llvm/bin")
|
||||
set(HIP_CLANG_PATH "/opt/rocm/llvm/bin")
|
||||
endif()
|
||||
endif()
|
||||
set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HIP_CLANG_PATH} ${HIP_CLANG_PARALLEL_BUILD_LINK_OPTIONS} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
|
||||
|
||||
@@ -85,11 +85,11 @@ __device__ static inline unsigned int __ffsll(long long int input) {
|
||||
}
|
||||
|
||||
__device__ static inline unsigned int __brev(unsigned int input) {
|
||||
return __llvm_bitrev_b32(input);
|
||||
return __builtin_bitreverse32(input);
|
||||
}
|
||||
|
||||
__device__ static inline unsigned long long int __brevll(unsigned long long int input) {
|
||||
return __llvm_bitrev_b64(input);
|
||||
return __builtin_bitreverse64(input);
|
||||
}
|
||||
|
||||
__device__ static inline unsigned int __lastbit_u32_u64(uint64_t input) {
|
||||
@@ -233,7 +233,10 @@ __device__ static inline unsigned int __usad(unsigned int x, unsigned int y, uns
|
||||
return __ockl_sadd_u32(x, y, z);
|
||||
}
|
||||
|
||||
__device__ static inline unsigned int __lane_id() { return __mbcnt_hi(-1, __mbcnt_lo(-1, 0)); }
|
||||
__device__ static inline unsigned int __lane_id() {
|
||||
return __builtin_amdgcn_mbcnt_hi(
|
||||
-1, __builtin_amdgcn_mbcnt_lo(-1, 0));
|
||||
}
|
||||
|
||||
/*
|
||||
HIP specific device functions
|
||||
@@ -241,25 +244,25 @@ HIP specific device functions
|
||||
|
||||
__device__ static inline unsigned __hip_ds_bpermute(int index, unsigned src) {
|
||||
union { int i; unsigned u; float f; } tmp; tmp.u = src;
|
||||
tmp.i = __llvm_amdgcn_ds_bpermute(index, tmp.i);
|
||||
tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i);
|
||||
return tmp.u;
|
||||
}
|
||||
|
||||
__device__ static inline float __hip_ds_bpermutef(int index, float src) {
|
||||
union { int i; unsigned u; float f; } tmp; tmp.f = src;
|
||||
tmp.i = __llvm_amdgcn_ds_bpermute(index, tmp.i);
|
||||
tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i);
|
||||
return tmp.f;
|
||||
}
|
||||
|
||||
__device__ static inline unsigned __hip_ds_permute(int index, unsigned src) {
|
||||
union { int i; unsigned u; float f; } tmp; tmp.u = src;
|
||||
tmp.i = __llvm_amdgcn_ds_permute(index, tmp.i);
|
||||
tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i);
|
||||
return tmp.u;
|
||||
}
|
||||
|
||||
__device__ static inline float __hip_ds_permutef(int index, float src) {
|
||||
union { int i; unsigned u; float f; } tmp; tmp.u = src;
|
||||
tmp.i = __llvm_amdgcn_ds_permute(index, tmp.i);
|
||||
tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i);
|
||||
return tmp.u;
|
||||
}
|
||||
|
||||
@@ -293,8 +296,8 @@ __device__ static inline float __hip_ds_swizzlef_N(float src) {
|
||||
|
||||
template <int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl>
|
||||
__device__ static inline int __hip_move_dpp_N(int src) {
|
||||
return __llvm_amdgcn_move_dpp(src, dpp_ctrl, row_mask, bank_mask,
|
||||
bound_ctrl);
|
||||
return __builtin_amdgcn_mov_dpp(src, dpp_ctrl, row_mask, bank_mask,
|
||||
bound_ctrl);
|
||||
}
|
||||
|
||||
static constexpr int warpSize = 64;
|
||||
@@ -304,7 +307,7 @@ inline
|
||||
int __shfl(int var, int src_lane, int width = warpSize) {
|
||||
int self = __lane_id();
|
||||
int index = src_lane + (self & ~(width-1));
|
||||
return __llvm_amdgcn_ds_bpermute(index<<2, var);
|
||||
return __builtin_amdgcn_ds_bpermute(index<<2, var);
|
||||
}
|
||||
__device__
|
||||
inline
|
||||
@@ -376,7 +379,7 @@ int __shfl_up(int var, unsigned int lane_delta, int width = warpSize) {
|
||||
int self = __lane_id();
|
||||
int index = self - lane_delta;
|
||||
index = (index < (self & ~(width-1)))?self:index;
|
||||
return __llvm_amdgcn_ds_bpermute(index<<2, var);
|
||||
return __builtin_amdgcn_ds_bpermute(index<<2, var);
|
||||
}
|
||||
__device__
|
||||
inline
|
||||
@@ -446,7 +449,7 @@ int __shfl_down(int var, unsigned int lane_delta, int width = warpSize) {
|
||||
int self = __lane_id();
|
||||
int index = self + lane_delta;
|
||||
index = (int)((self&(width-1))+lane_delta) >= width?self:index;
|
||||
return __llvm_amdgcn_ds_bpermute(index<<2, var);
|
||||
return __builtin_amdgcn_ds_bpermute(index<<2, var);
|
||||
}
|
||||
__device__
|
||||
inline
|
||||
@@ -516,7 +519,7 @@ int __shfl_xor(int var, int lane_mask, int width = warpSize) {
|
||||
int self = __lane_id();
|
||||
int index = self^lane_mask;
|
||||
index = index >= ((self+width)&~(width-1))?self:index;
|
||||
return __llvm_amdgcn_ds_bpermute(index<<2, var);
|
||||
return __builtin_amdgcn_ds_bpermute(index<<2, var);
|
||||
}
|
||||
__device__
|
||||
inline
|
||||
|
||||
@@ -506,6 +506,14 @@ hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t config);
|
||||
*/
|
||||
hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* pConfig);
|
||||
|
||||
/**
|
||||
* @brief Gets the flags set for current device
|
||||
*
|
||||
* @param [out] flags
|
||||
*
|
||||
* @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue
|
||||
*/
|
||||
hipError_t hipGetDeviceFlags(unsigned *flags);
|
||||
|
||||
/**
|
||||
* @brief The bank width of shared memory on current device is set
|
||||
|
||||
@@ -31,40 +31,11 @@ THE SOFTWARE.
|
||||
|
||||
#include "hip/hcc_detail/host_defines.h"
|
||||
|
||||
__device__
|
||||
__attribute__((convergent))
|
||||
ulong __llvm_amdgcn_icmp_i32(uint x, uint y, uint z) __asm("llvm.amdgcn.icmp.i32");
|
||||
|
||||
// FIXME: These should all be removed and proper builtins used.
|
||||
__device__
|
||||
unsigned __llvm_amdgcn_groupstaticsize() __asm("llvm.amdgcn.groupstaticsize");
|
||||
|
||||
__device__
|
||||
unsigned int __llvm_bitrev_b32(unsigned int src0) __asm("llvm.bitreverse.i32");
|
||||
|
||||
__device__
|
||||
uint64_t __llvm_bitrev_b64(uint64_t src0) __asm("llvm.bitreverse.i64");
|
||||
|
||||
extern
|
||||
__device__
|
||||
__attribute__((const))
|
||||
unsigned int __mbcnt_lo(unsigned int x, unsigned int y) __asm("llvm.amdgcn.mbcnt.lo");
|
||||
|
||||
extern
|
||||
__device__
|
||||
__attribute__((const))
|
||||
unsigned int __mbcnt_hi(unsigned int x, unsigned int y) __asm("llvm.amdgcn.mbcnt.hi");
|
||||
|
||||
__device__
|
||||
int __llvm_amdgcn_ds_bpermute(int index, int src) __asm("llvm.amdgcn.ds.bpermute");
|
||||
|
||||
__device__
|
||||
int __llvm_amdgcn_ds_permute(int index, int src) __asm("llvm.amdgcn.ds.permute");
|
||||
|
||||
__device__
|
||||
int __llvm_amdgcn_ds_swizzle(int index, int pattern) __asm("llvm.amdgcn.ds.swizzle");
|
||||
|
||||
__device__
|
||||
int __llvm_amdgcn_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask,
|
||||
bool bound_ctrl) __asm("llvm.amdgcn.mov.dpp.i32");
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1411,12 +1411,12 @@ float func(float x, int y) \
|
||||
__DEF_FLOAT_FUN2I(scalbn)
|
||||
|
||||
template<class T>
|
||||
__DEVICE__ inline static T min(T arg1, T arg2) {
|
||||
__DEVICE__ inline T min(T arg1, T arg2) {
|
||||
return (arg1 < arg2) ? arg1 : arg2;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
__DEVICE__ inline static T max(T arg1, T arg2) {
|
||||
__DEVICE__ inline T max(T arg1, T arg2) {
|
||||
return (arg1 > arg2) ? arg1 : arg2;
|
||||
}
|
||||
|
||||
|
||||
@@ -1511,6 +1511,10 @@ inline static hipError_t hipProfilerStart() { return hipCUDAErrorTohipError(cuda
|
||||
|
||||
inline static hipError_t hipProfilerStop() { return hipCUDAErrorTohipError(cudaProfilerStop()); }
|
||||
|
||||
inline static hipError_t hipGetDeviceFlags(unsigned int* flags) {
|
||||
return hipCUDAErrorTohipError(cudaGetDeviceFlags(flags));
|
||||
}
|
||||
|
||||
inline static hipError_t hipSetDeviceFlags(unsigned int flags) {
|
||||
return hipCUDAErrorTohipError(cudaSetDeviceFlags(flags));
|
||||
}
|
||||
|
||||
@@ -3,6 +3,10 @@ project(hip_base)
|
||||
|
||||
install(DIRECTORY @hip_SOURCE_DIR@/bin DESTINATION . USE_SOURCE_PERMISSIONS)
|
||||
install(DIRECTORY @hip_SOURCE_DIR@/include DESTINATION .)
|
||||
install(FILES @PROJECT_BINARY_DIR@/include/hip/hcc_detail/hip_prof_str.h
|
||||
DESTINATION include/hip/hcc_detail)
|
||||
install(FILES @PROJECT_BINARY_DIR@/include/hip/hip_version.h
|
||||
DESTINATION include/hip)
|
||||
install(FILES @PROJECT_BINARY_DIR@/.hipVersion DESTINATION bin)
|
||||
install(PROGRAMS @PROJECT_BINARY_DIR@/lpl DESTINATION bin)
|
||||
install(PROGRAMS @PROJECT_BINARY_DIR@/ca DESTINATION bin)
|
||||
|
||||
@@ -14,7 +14,7 @@ HIPDIR=$ROCMDIR/hip
|
||||
HIPLIBDIR=$ROCMDIR/hip/lib
|
||||
|
||||
# Soft-link to library files
|
||||
HIPLIBFILES=$(ls -A $HIPLIBDIR | grep -v [-/$])
|
||||
HIPLIBFILES=$(ls -A $HIPLIBDIR | grep -v "cmake\|[-/$]")
|
||||
mkdir -p $ROCMLIBDIR
|
||||
mkdir -p $ROCMLIBDIR/cmake
|
||||
pushd $ROCMLIBDIR
|
||||
|
||||
@@ -14,7 +14,7 @@ HIPDIR=$ROCMDIR/hip
|
||||
HIPLIBDIR=$ROCMDIR/hip/lib
|
||||
|
||||
# Soft-link to library files
|
||||
HIPLIBFILES=$(ls -A $HIPLIBDIR | grep -v [-/$])
|
||||
HIPLIBFILES=$(ls -A $HIPLIBDIR | grep -v "cmake\|[-/$]")
|
||||
mkdir -p $ROCMLIBDIR
|
||||
mkdir -p $ROCMLIBDIR/cmake
|
||||
pushd $ROCMLIBDIR
|
||||
|
||||
@@ -27,30 +27,44 @@ endif()
|
||||
|
||||
set(USE_PROF_API "1")
|
||||
|
||||
if(NOT DEFINED LIBROCclr_STATIC_DIR)
|
||||
find_path(LIBROCclr_STATIC_DIR
|
||||
NAMES libamdrocclr_static.a
|
||||
PATHS /opt/rocm/rocclr
|
||||
PATH_SUFFIXES lib
|
||||
)
|
||||
# FIXME: Make this required and remove the legacy handling below
|
||||
set(save_rocclr_dir ${ROCclr_DIR})
|
||||
set(save_rocclr_static_dir ${LIBROCclr_STATIC_DIR})
|
||||
|
||||
find_package(ROCclr CONFIG
|
||||
PATHS
|
||||
/opt/rocm
|
||||
/opt/rocm/rocclr)
|
||||
|
||||
if (NOT ROCclr_FOUND)
|
||||
if(NOT DEFINED LIBROCclr_STATIC_DIR)
|
||||
find_path(LIBROCclr_STATIC_DIR
|
||||
NAMES libamdrocclr_static.a
|
||||
PATHS /opt/rocm/rocclr
|
||||
PATH_SUFFIXES lib)
|
||||
else()
|
||||
set(LIBROCclr_STATIC_DIR ${save_rocclr_static_dir})
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED ROCclr_DIR)
|
||||
find_path(ROCclr_DIR
|
||||
NAMES top.hpp
|
||||
PATH_SUFFIXES include
|
||||
PATHS /opt/rocm/rocclr)
|
||||
else()
|
||||
set(ROCclr_DIR ${save_rocclr_dir})
|
||||
endif()
|
||||
message("Found Static rocclr lib:${LIBROCclr_STATIC_DIR} and rocclr includes: ${ROCclr_DIR}")
|
||||
include(${LIBROCclr_STATIC_DIR}/amdrocclr_staticTargets.cmake)
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED ROCclr_DIR)
|
||||
find_path(ROCclr_DIR
|
||||
NAMES top.hpp
|
||||
PATH_SUFFIXES include
|
||||
PATHS /opt/rocm/rocclr
|
||||
)
|
||||
endif()
|
||||
message("Found Static rocclr lib:${LIBROCclr_STATIC_DIR} and rocclr includes: ${ROCclr_DIR}")
|
||||
set(PROF_API_HEADER_PATH ${ROCclr_DIR}/platform)
|
||||
#############################
|
||||
# Profiling API support
|
||||
#############################
|
||||
# Generate profiling API macros/structures header
|
||||
# FIXME: This should not be writing to the source directory
|
||||
set(PROF_API_STR "${PROJECT_BINARY_DIR}/include/hip/hcc_detail/hip_prof_str.h")
|
||||
set(PROF_API_HDR "${CMAKE_CURRENT_SOURCE_DIR}/../include/hip/hcc_detail/hip_runtime_api.h")
|
||||
set(PROF_API_HDR "${PROJECT_SOURCE_DIR}/include/hip/hcc_detail/hip_runtime_api.h")
|
||||
set(PROF_API_SRC "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
set(PROF_API_GEN "${CMAKE_CURRENT_SOURCE_DIR}/hip_prof_gen.py")
|
||||
set(PROF_API_LOG "${PROJECT_BINARY_DIR}/hip_prof_gen.log.txt")
|
||||
@@ -66,25 +80,6 @@ add_custom_target(gen-prof-api-str-header ALL
|
||||
SOURCES ${PROF_API_HDR})
|
||||
|
||||
# Enable profiling API
|
||||
if(USE_PROF_API EQUAL 1)
|
||||
find_path(PROF_API_HEADER_DIR prof_protocol.h
|
||||
HINTS
|
||||
${PROF_API_HEADER_PATH}
|
||||
PATHS
|
||||
/opt/rocm/roctracer
|
||||
PATH_SUFFIXES
|
||||
include/ext
|
||||
)
|
||||
if(NOT PROF_API_HEADER_DIR)
|
||||
MESSAGE(WARNING "Profiling API header not found. Disabling roctracer integration. Use -DPROF_API_HEADER_PATH=<path to prof_protocol.h header>")
|
||||
else()
|
||||
add_definitions(-DUSE_PROF_API=1)
|
||||
include_directories(${PROF_API_HEADER_DIR})
|
||||
MESSAGE(STATUS "Profiling API: ${PROF_API_HEADER_DIR}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
if(NOT DEFINED ROCclr_DIR OR NOT DEFINED LIBOCL_STATIC_DIR OR NOT DEFINED LIBROCclr_STATIC_DIR )
|
||||
# message(FATAL_ERROR "define ROCclr_DIR, LIBOCL_STATIC_DIR\n")
|
||||
|
||||
@@ -92,35 +87,16 @@ endif()
|
||||
list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules" )
|
||||
set(CMAKE_MODULE_PATH${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules")
|
||||
|
||||
include_directories(${ROCR_INCLUDES})
|
||||
if (DEFINED LLVM_INCLUDES AND NOT ${LLVM_INCLUDES} STREQUAL "")
|
||||
message(STATUS "LLVM includes found ${LLVM_INCLUDES}")
|
||||
include_directories(${LLVM_INCLUDES})
|
||||
endif() # if (DEFINED LLVM_INCLUDES AND NOT ${LLVM_INCLUDES} STREQUAL "")
|
||||
|
||||
include_directories(${CMAKE_SOURCE_DIR})
|
||||
include_directories(${CMAKE_SOURCE_DIR}/include)
|
||||
include_directories(${PROJECT_BINARY_DIR}/include)
|
||||
include_directories(${CMAKE_SOURCE_DIR}/elfio)
|
||||
include_directories(${CMAKE_SOURCE_DIR}/amdocl)
|
||||
include_directories(${CMAKE_SOURCE_DIR}/include/hip/hcc_detail/elfio)
|
||||
include_directories(${ROCclr_DIR})
|
||||
include_directories(${ROCclr_DIR}/include)
|
||||
include_directories(${ROCclr_DIR}/compiler/lib)
|
||||
include_directories(${ROCclr_DIR}/compiler/lib/include)
|
||||
include_directories(${ROCclr_DIR}/elf/utils/common)
|
||||
include_directories(${ROCclr_DIR}/elf/utils/libelf)
|
||||
add_definitions(-DUSE_COMGR_LIBRARY -DCOMGR_DYN_DLL)
|
||||
find_package(amd_comgr REQUIRED CONFIG
|
||||
PATHS
|
||||
/opt/rocm/
|
||||
PATH_SUFFIXES
|
||||
cmake/amd_comgr
|
||||
lib/cmake/amd_comgr
|
||||
)
|
||||
MESSAGE(STATUS "Code Object Manager found at ${amd_comgr_DIR}.")
|
||||
|
||||
include_directories("$<TARGET_PROPERTY:amd_comgr,INTERFACE_INCLUDE_DIRECTORIES>")
|
||||
find_package(amd_comgr REQUIRED CONFIG
|
||||
PATHS
|
||||
/opt/rocm/
|
||||
PATH_SUFFIXES
|
||||
cmake/amd_comgr
|
||||
lib/cmake/amd_comgr)
|
||||
|
||||
message(STATUS "Code Object Manager found at ${amd_comgr_DIR}.")
|
||||
|
||||
add_definitions(-DBSD_LIBELF)
|
||||
|
||||
@@ -147,6 +123,55 @@ add_library(hip64 OBJECT
|
||||
)
|
||||
set_target_properties(hip64 PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
target_include_directories(hip64
|
||||
PUBLIC
|
||||
${PROJECT_SOURCE_DIR}/include
|
||||
${PROJECT_BINARY_DIR}/include
|
||||
PRIVATE
|
||||
${CMAKE_SOURCE_DIR}/elfio
|
||||
${PROJECT_SOURCE_DIR}
|
||||
${PROJECT_SOURCE_DIR}/amdocl
|
||||
${PROJECT_SOURCE_DIR}/include/hip/hcc_detail/elfio
|
||||
${ROCclr_DIR}
|
||||
${ROCclr_DIR}/include
|
||||
${ROCclr_DIR}/compiler/lib
|
||||
${ROCclr_DIR}/compiler/lib/include
|
||||
${ROCclr_DIR}/elf/utils/common
|
||||
${ROCclr_DIR}/elf/utils/libelf
|
||||
${ROCR_INCLUDES}
|
||||
$<TARGET_PROPERTY:amd_comgr,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
target_compile_definitions(hip64
|
||||
PRIVATE
|
||||
$<TARGET_PROPERTY:amd_comgr,INTERFACE_COMPILE_DEFINITIONS>)
|
||||
|
||||
if(ROCclr_FOUND)
|
||||
target_include_directories(hip64
|
||||
PRIVATE
|
||||
$<TARGET_PROPERTY:amdrocclr_static,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
target_compile_definitions(hip64
|
||||
PRIVATE
|
||||
$<TARGET_PROPERTY:amdrocclr_static,INTERFACE_COMPILE_DEFINITIONS>)
|
||||
endif()
|
||||
|
||||
# Enable profiling API
|
||||
if(USE_PROF_API EQUAL 1)
|
||||
find_path(PROF_API_HEADER_DIR prof_protocol.h
|
||||
HINTS
|
||||
${PROF_API_HEADER_PATH}
|
||||
PATHS
|
||||
/opt/rocm/roctracer
|
||||
PATH_SUFFIXES
|
||||
include/ext)
|
||||
|
||||
if(NOT PROF_API_HEADER_DIR)
|
||||
message(WARNING "Profiling API header not found. Disabling roctracer integration. Use -DPROF_API_HEADER_PATH=<path to prof_protocol.h header>")
|
||||
else()
|
||||
target_compile_definitions(hip64 PUBLIC USE_PROF_API=1)
|
||||
target_include_directories(hip64 PUBLIC ${PROF_API_HEADER_DIR})
|
||||
message(STATUS "Profiling API: ${PROF_API_HEADER_DIR}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set_target_properties(
|
||||
hip64 PROPERTIES
|
||||
CXX_STANDARD 14
|
||||
@@ -157,7 +182,6 @@ add_dependencies(hip64 gen-prof-api-str-header)
|
||||
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
find_package(Threads REQUIRED)
|
||||
include(${LIBROCclr_STATIC_DIR}/amdrocclr_staticTargets.cmake)
|
||||
|
||||
add_library(amdhip64 SHARED
|
||||
$<TARGET_OBJECTS:hip64>
|
||||
|
||||
@@ -188,13 +188,13 @@ hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device )
|
||||
deviceProps.arch.hasGlobalFloatAtomicExch = 1;
|
||||
deviceProps.arch.hasSharedInt32Atomics = 1;
|
||||
deviceProps.arch.hasSharedFloatAtomicExch = 1;
|
||||
deviceProps.arch.hasFloatAtomicAdd = 0;
|
||||
deviceProps.arch.hasFloatAtomicAdd = 1;
|
||||
deviceProps.arch.hasGlobalInt64Atomics = 1;
|
||||
deviceProps.arch.hasSharedInt64Atomics = 1;
|
||||
deviceProps.arch.hasDoubles = 1;
|
||||
deviceProps.arch.hasWarpVote = 0;
|
||||
deviceProps.arch.hasWarpBallot = 0;
|
||||
deviceProps.arch.hasWarpShuffle = 0;
|
||||
deviceProps.arch.hasWarpVote = 1;
|
||||
deviceProps.arch.hasWarpBallot = 1;
|
||||
deviceProps.arch.hasWarpShuffle = 1;
|
||||
deviceProps.arch.hasFunnelShift = 0;
|
||||
deviceProps.arch.hasThreadFenceSystem = 1;
|
||||
deviceProps.arch.hasSyncThreadsExt = 0;
|
||||
|
||||
@@ -471,7 +471,12 @@ hipError_t hipGetDeviceCount ( int* count ) {
|
||||
}
|
||||
|
||||
hipError_t hipGetDeviceFlags ( unsigned int* flags ) {
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
HIP_INIT_API(hipGetDeviceFlags, flags);
|
||||
if (flags == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
*flags = hip::getCurrentDevice()->getFlags();
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipIpcGetEventHandle ( hipIpcEventHandle_t* handle, hipEvent_t event ) {
|
||||
@@ -531,7 +536,8 @@ hipError_t hipSetDeviceFlags ( unsigned int flags ) {
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
hip::getCurrentDevice()->setFlags(flags & hipDeviceScheduleMask);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
|
||||
@@ -91,7 +91,7 @@ hipError_t Event::elapsedTime(Event& eStop, float& ms) {
|
||||
}
|
||||
|
||||
ms = static_cast<float>(static_cast<int64_t>(eStop.event_->profilingInfo().end_ -
|
||||
event_->profilingInfo().start_))/1000000.f;
|
||||
event_->profilingInfo().end_))/1000000.f;
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
@@ -149,6 +149,7 @@ hipPointerGetAttributes
|
||||
hipProfilerStart
|
||||
hipProfilerStop
|
||||
hipRuntimeGetVersion
|
||||
hipGetDeviceFlags
|
||||
hipSetDevice
|
||||
hipSetDeviceFlags
|
||||
hipStreamAddCallback
|
||||
|
||||
@@ -149,6 +149,7 @@ global:
|
||||
hipProfilerStart;
|
||||
hipProfilerStop;
|
||||
hipRuntimeGetVersion;
|
||||
hipGetDeviceFlags;
|
||||
hipSetDevice;
|
||||
hipSetDeviceFlags;
|
||||
hipStreamAddCallback;
|
||||
|
||||
@@ -119,12 +119,14 @@ namespace hip {
|
||||
int deviceId_;
|
||||
/// ROCclr host queue for default streams
|
||||
Stream null_stream_;
|
||||
//Maintain list of user enabled peers
|
||||
/// Store device flags
|
||||
unsigned int flags_;
|
||||
/// Maintain list of user enabled peers
|
||||
std::list<int> userEnabledPeers;
|
||||
|
||||
public:
|
||||
Device(amd::Context* ctx, int devId):
|
||||
context_(ctx), deviceId_(devId), null_stream_(this, amd::CommandQueue::Priority::Normal, 0, true)
|
||||
context_(ctx), deviceId_(devId), null_stream_(this, amd::CommandQueue::Priority::Normal, 0, true), flags_(hipDeviceScheduleSpin)
|
||||
{ assert(ctx != nullptr); }
|
||||
~Device() {}
|
||||
|
||||
@@ -152,6 +154,8 @@ namespace hip {
|
||||
return hipErrorPeerAccessNotEnabled;
|
||||
}
|
||||
}
|
||||
unsigned int getFlags() const { return flags_; }
|
||||
void setFlags(unsigned int flags) { flags_ = flags; }
|
||||
amd::HostQueue* NullStream(bool skip_alloc = false);
|
||||
};
|
||||
|
||||
@@ -208,6 +212,7 @@ public:
|
||||
void init();
|
||||
std::vector<std::pair<hipModule_t, bool>>* addFatBinary(const void*data)
|
||||
{
|
||||
amd::ScopedLock lock(lock_);
|
||||
if (initialized_) {
|
||||
digestFatBinary(data, modules_[data]);
|
||||
}
|
||||
@@ -215,6 +220,7 @@ public:
|
||||
}
|
||||
void removeFatBinary(std::vector<std::pair<hipModule_t, bool>>* module)
|
||||
{
|
||||
amd::ScopedLock lock(lock_);
|
||||
for (auto& mod : modules_) {
|
||||
if (&mod.second == module) {
|
||||
modules_.erase(&mod);
|
||||
|
||||
@@ -129,7 +129,10 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin
|
||||
amd::HostQueue* pQueue = &queue;
|
||||
if (queueDevice != dstMemory->getContext().devices()[0]) {
|
||||
pQueue = hip::getNullStream(dstMemory->getContext());
|
||||
waitList.push_back(queue.getLastQueuedCommand(true));
|
||||
amd::Command* cmd = queue.getLastQueuedCommand(true);
|
||||
if (cmd != nullptr) {
|
||||
waitList.push_back(cmd);
|
||||
}
|
||||
}
|
||||
command = new amd::WriteMemoryCommand(*pQueue, CL_COMMAND_WRITE_BUFFER, waitList,
|
||||
*dstMemory->asBuffer(), dOffset, sizeBytes, src);
|
||||
@@ -138,7 +141,10 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin
|
||||
amd::HostQueue* pQueue = &queue;
|
||||
if (queueDevice != srcMemory->getContext().devices()[0]) {
|
||||
pQueue = hip::getNullStream(srcMemory->getContext());
|
||||
waitList.push_back(queue.getLastQueuedCommand(true));
|
||||
amd::Command* cmd = queue.getLastQueuedCommand(true);
|
||||
if (cmd != nullptr) {
|
||||
waitList.push_back(cmd);
|
||||
}
|
||||
}
|
||||
command = new amd::ReadMemoryCommand(*pQueue, CL_COMMAND_READ_BUFFER, waitList,
|
||||
*srcMemory->asBuffer(), sOffset, sizeBytes, dst);
|
||||
|
||||
@@ -314,21 +314,19 @@ hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunc
|
||||
|
||||
switch(attrib) {
|
||||
case HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES:
|
||||
*value = static_cast<int>(wrkGrpInfo->localMemSize_
|
||||
- wrkGrpInfo->privateMemSize_);
|
||||
*value = static_cast<int>(wrkGrpInfo->localMemSize_);
|
||||
break;
|
||||
case HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK:
|
||||
*value = static_cast<int>(wrkGrpInfo->wavefrontPerSIMD_
|
||||
* wrkGrpInfo->wavefrontSize_);
|
||||
*value = static_cast<int>(wrkGrpInfo->size_);
|
||||
break;
|
||||
case HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES:
|
||||
*value = 0;
|
||||
break;
|
||||
case HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES:
|
||||
*value = static_cast<int>(wrkGrpInfo->localMemSize_);
|
||||
*value = static_cast<int>(wrkGrpInfo->privateMemSize_);
|
||||
break;
|
||||
case HIP_FUNC_ATTRIBUTE_NUM_REGS:
|
||||
*value = static_cast<int>(wrkGrpInfo->availableGPRs_);
|
||||
*value = static_cast<int>(wrkGrpInfo->usedVGPRs_);
|
||||
break;
|
||||
case HIP_FUNC_ATTRIBUTE_PTX_VERSION:
|
||||
*value = 30; // Defaults to 3.0 as HCC
|
||||
@@ -340,7 +338,7 @@ hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunc
|
||||
*value = 0;
|
||||
break;
|
||||
case HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES:
|
||||
*value = static_cast<int>(wrkGrpInfo->availableLDSSize_);
|
||||
*value = static_cast<int>(wrkGrpInfo->availableLDSSize_ - wrkGrpInfo->localMemSize_);
|
||||
break;
|
||||
case HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT:
|
||||
*value = 0;
|
||||
|
||||
@@ -122,10 +122,12 @@ hipError_t __hipExtractCodeObjectFromFatBinary(const void* data,
|
||||
num_code_objs++;
|
||||
}
|
||||
}
|
||||
if (num_code_objs == devices.size())
|
||||
if (num_code_objs == devices.size()) {
|
||||
return hipSuccess;
|
||||
else
|
||||
} else {
|
||||
DevLogError("hipErrorNoBinaryForGpu: Coudn't find binary for current devices!");
|
||||
return hipErrorNoBinaryForGpu;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" std::vector<std::pair<hipModule_t, bool>>* __hipRegisterFatBinary(const void* data)
|
||||
@@ -189,6 +191,13 @@ void PlatformState::init()
|
||||
for (auto& it : vars_) {
|
||||
it.second.rvars.resize(g_devices.size());
|
||||
}
|
||||
if (!HIP_ENABLE_LAZY_KERNEL_LOADING) {
|
||||
for (size_t i = 0; i < g_devices.size(); ++i) {
|
||||
for (auto& it: functions_) {
|
||||
getFunc(it.first, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool PlatformState::unregisterFunc(hipModule_t hmod) {
|
||||
@@ -227,6 +236,11 @@ std::vector< std::pair<hipModule_t, bool> >* PlatformState::unregisterVar(hipMod
|
||||
= reinterpret_cast<texture<float, hipTextureType1D, hipReadModeElementType> *>(dvar.shadowVptr);
|
||||
delete tex_hptr;
|
||||
}
|
||||
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
|
||||
if (dvar.rvars[dev].getdeviceptr()) {
|
||||
amd::MemObjMap::RemoveMemObj(dvar.rvars[dev].getdeviceptr());
|
||||
}
|
||||
}
|
||||
vars_.erase(it++);
|
||||
} else {
|
||||
++it;
|
||||
@@ -309,11 +323,20 @@ bool ihipGetFuncAttributes(const char* func_name, amd::Program* program, hipFunc
|
||||
return false;
|
||||
}
|
||||
|
||||
const device::Kernel::WorkGroupInfo* wginfo = it->second->workGroupInfo();
|
||||
func_attr->localSizeBytes = wginfo->localMemSize_;
|
||||
func_attr->sharedSizeBytes = wginfo->size_;
|
||||
func_attr->maxThreadsPerBlock = wginfo->wavefrontSize_;
|
||||
func_attr->numRegs = wginfo->usedVGPRs_;
|
||||
const device::Kernel* kernel = it->second;
|
||||
const device::Kernel::WorkGroupInfo* wginfo = kernel->workGroupInfo();
|
||||
func_attr->sharedSizeBytes = static_cast<int>(wginfo->localMemSize_);
|
||||
func_attr->binaryVersion = static_cast<int>(kernel->signature().version());
|
||||
func_attr->cacheModeCA = 0;
|
||||
func_attr->constSizeBytes = 0;
|
||||
func_attr->localSizeBytes = wginfo->privateMemSize_;
|
||||
func_attr->maxDynamicSharedSizeBytes = static_cast<int>(wginfo->availableLDSSize_
|
||||
- wginfo->localMemSize_);
|
||||
|
||||
func_attr->maxThreadsPerBlock = static_cast<int>(wginfo->size_);
|
||||
func_attr->numRegs = static_cast<int>(wginfo->usedVGPRs_);
|
||||
func_attr->preferredShmemCarveout = 0;
|
||||
func_attr->ptxVersion = 30;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
#include "hip_event.hpp"
|
||||
#include "thread/monitor.hpp"
|
||||
|
||||
static amd::Monitor streamSetLock("Guards global stream set");
|
||||
static amd::Monitor streamSetLock{"Guards global stream set"};
|
||||
static std::unordered_set<hip::Stream*> streamSet;
|
||||
|
||||
// Internal structure for stream callback handler
|
||||
@@ -83,11 +83,11 @@ amd::HostQueue* Stream::asHostQueue(bool skip_alloc) {
|
||||
// ================================================================================================
|
||||
void Stream::Destroy() {
|
||||
if (queue_ != nullptr) {
|
||||
queue_->release();
|
||||
queue_ = nullptr;
|
||||
|
||||
amd::ScopedLock lock(streamSetLock);
|
||||
streamSet.erase(this);
|
||||
|
||||
queue_->release();
|
||||
queue_ = nullptr;
|
||||
}
|
||||
delete this;
|
||||
}
|
||||
|
||||
@@ -38,11 +38,11 @@ THE SOFTWARE.
|
||||
|
||||
__global__ void HIP_kernel(unsigned int* mbcnt_lo, unsigned int* mbcnt_hi, unsigned int* lane_id) {
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
mbcnt_lo[x] = __mbcnt_lo(0xFFFFFFFF, 0);
|
||||
mbcnt_hi[x] = __mbcnt_hi(0xFFFFFFFF, 0);
|
||||
mbcnt_lo[x] = __builtin_amdgcn_mbcnt_lo(0xFFFFFFFF, 0);
|
||||
mbcnt_hi[x] = __builtin_amdgcn_mbcnt_hi(0xFFFFFFFF, 0);
|
||||
lane_id[x] = __lane_id();
|
||||
}
|
||||
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main() {
|
||||
|
||||
@@ -37,18 +37,13 @@ THE SOFTWARE.
|
||||
|
||||
using namespace std::chrono;
|
||||
|
||||
const static uint NumOfLoopIterrations = 16 * 1024;
|
||||
const static uint BufferSizeInDwords = 28672 * NumOfLoopIterrations;
|
||||
const static uint BufferSizeInDwords = 256 * 1024 * 1024;
|
||||
const static uint numQueues = 4;
|
||||
const static uint numIter = 100;
|
||||
constexpr uint NumKernelArgs = 4;
|
||||
constexpr uint MaxGPUs = 8;
|
||||
|
||||
#include <stdio.h>
|
||||
/*
|
||||
namespace cg = cooperative_groups;
|
||||
using namespace cooperative_groups;
|
||||
*/
|
||||
|
||||
__global__ void test_gws(uint* buf, uint bufSize, long* tmpBuf, long* result)
|
||||
{
|
||||
@@ -126,11 +121,13 @@ int main() {
|
||||
size_t SIZE = copySizeInDwords * sizeof(uint);
|
||||
|
||||
HIPCHECK(hipMalloc((void**)&dA[i], SIZE));
|
||||
HIPCHECK(hipMalloc((void**)&dB[i], 64 * deviceProp[i].multiProcessorCount * sizeof(long)));
|
||||
if (i == 0) {
|
||||
HIPCHECK(hipHostMalloc((void**)&dC, (nGpu + 1) * sizeof(long), hipHostMallocCoherent));
|
||||
}
|
||||
HIPCHECK(hipMemcpy(dA[i], &init[i * copySizeInDwords] , SIZE, hipMemcpyHostToDevice));
|
||||
HIPCHECK(hipStreamCreate(&stream[i]));
|
||||
hipDeviceSynchronize();
|
||||
}
|
||||
|
||||
dim3 dimBlock;
|
||||
@@ -146,22 +143,22 @@ int main() {
|
||||
uint workgroups[3] = {64, 128, 256};
|
||||
|
||||
hipLaunchParams* launchParamsList = new hipLaunchParams[nGpu];
|
||||
|
||||
system_clock::time_point start = system_clock::now();
|
||||
|
||||
std::time_t end_time;
|
||||
double time = 0;
|
||||
for (uint set = 0; set < 3; ++set) {
|
||||
void* args[MaxGPUs * NumKernelArgs];
|
||||
std::cout << "---------- Test#" << set << "---------------\n";
|
||||
std::cout << "---------- Test#" << set << ", size: "<< BufferSizeInDwords <<
|
||||
" dwords ---------------\n";
|
||||
for (int i = 0; i < nGpu; i++) {
|
||||
HIPCHECK(hipSetDevice(i));
|
||||
dimBlock.x = workgroups[set];
|
||||
HIPCHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks,
|
||||
test_gws, dimBlock.x * dimBlock.y * dimBlock.z, dimBlock.x * sizeof(long)));
|
||||
|
||||
std::cout << "GPU(" << i << ") Block size: " << dimBlock.x << " Num blocks per CU: " << numBlocks << "\n";
|
||||
|
||||
std::cout << "GPU(" << i << ") Block size: " << dimBlock.x <<
|
||||
" Num blocks per CU: " << numBlocks << "\n";
|
||||
|
||||
dimGrid.x = deviceProp[i].multiProcessorCount * std::min(numBlocks, 32);
|
||||
HIPCHECK(hipMalloc((void**)&dB[i], dimGrid.x * sizeof(long)));
|
||||
|
||||
args[i * NumKernelArgs] = (void*)&dA[i];
|
||||
args[i * NumKernelArgs + 1] = (void*)©SizeInDwords;
|
||||
@@ -175,32 +172,34 @@ int main() {
|
||||
launchParamsList[i].stream = stream[i];
|
||||
launchParamsList[i].args = &args[i * NumKernelArgs];
|
||||
}
|
||||
|
||||
hipLaunchCooperativeKernelMultiDevice(launchParamsList, nGpu, 0);
|
||||
|
||||
if (*dC != (((long)(BufferSizeInDwords) * (BufferSizeInDwords - 1)) / 2)) {
|
||||
std::cout << "Data validation failed for grid size = " << dimGrid.x << " and block size = " << dimBlock.x << "\n";
|
||||
system_clock::time_point start = system_clock::now();
|
||||
hipLaunchCooperativeKernelMultiDevice(launchParamsList, nGpu, 0);
|
||||
system_clock::time_point end = system_clock::now();
|
||||
std::chrono::duration<double> elapsed_seconds = end - start;
|
||||
end_time = std::chrono::system_clock::to_time_t(end);
|
||||
|
||||
time += elapsed_seconds.count();
|
||||
|
||||
size_t processedDwords = copySizeInDwords * nGpu;
|
||||
if (*dC != (((long)(processedDwords) * (processedDwords - 1)) / 2)) {
|
||||
std::cout << "Data validation failed ("<< *dC << " != " <<
|
||||
(((long)(BufferSizeInDwords) * (BufferSizeInDwords - 1)) / 2) <<
|
||||
") for grid size = " << dimGrid.x << " and block size = " << dimBlock.x << "\n";
|
||||
std::cout << "Test failed! \n";
|
||||
}
|
||||
for (int i = 0; i < nGpu; i++) {
|
||||
hipFree(dB[i]);
|
||||
}
|
||||
}
|
||||
system_clock::time_point end = system_clock::now();
|
||||
|
||||
delete [] launchParamsList;
|
||||
|
||||
std::chrono::duration<double> elapsed_seconds = end - start;
|
||||
|
||||
std::time_t end_time = std::chrono::system_clock::to_time_t(end);
|
||||
|
||||
std::cout << "finished computation at " << std::ctime(&end_time) <<
|
||||
"elapsed time: " << elapsed_seconds.count() << "s\n";
|
||||
"elapsed time: " << time << "s\n";
|
||||
|
||||
hipSetDevice(0);
|
||||
hipFree(dC);
|
||||
for (int i = 0; i < nGpu; i++) {
|
||||
hipFree(dA[i]);
|
||||
hipFree(dB[i]);
|
||||
HIPCHECK(hipStreamDestroy(stream[i]));
|
||||
}
|
||||
delete [] init;
|
||||
|
||||
Reference in New Issue
Block a user