From cbe90236399ad6c0c604058419a50976e7254284 Mon Sep 17 00:00:00 2001 From: Michael LIAO Date: Wed, 29 Apr 2020 12:20:42 -0400 Subject: [PATCH 01/33] [vdi] Skip null texture object in `hipDestroyTextureObject`. - To match both CUDA and HCC runtime behavior. Change-Id: I072b006dd554e17f8341f391d33bf6224a125a7e --- vdi/hip_texture.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vdi/hip_texture.cpp b/vdi/hip_texture.cpp index 94026c8e33..acaaf8c165 100755 --- a/vdi/hip_texture.cpp +++ b/vdi/hip_texture.cpp @@ -312,7 +312,7 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, hipError_t ihipDestroyTextureObject(hipTextureObject_t texObject) { if (texObject == nullptr) { - return hipErrorInvalidValue; + return hipSuccess; } const hipResourceType type = texObject->resDesc.resType; From 4c3f8221e99225cab0aaf289ecb27ee3859964aa Mon Sep 17 00:00:00 2001 From: Payam Date: Wed, 29 Apr 2020 15:32:13 -0400 Subject: [PATCH 02/33] renaming vdi to rocclr, part 1 Change-Id: Idec9be2b6174217abcebaa1776e59168670740b1 --- CMakeLists.txt | 14 +++++++------- .../{hip-vdi.postinst => hip-rocclr.postinst} | 0 packaging/{hip-vdi.prerm => hip-rocclr.prerm} | 0 packaging/{hip-vdi.txt => hip-rocclr.txt} | 0 vdi/CMakeLists.txt | 8 ++++---- 5 files changed, 11 insertions(+), 11 deletions(-) rename packaging/{hip-vdi.postinst => hip-rocclr.postinst} (100%) rename packaging/{hip-vdi.prerm => hip-rocclr.prerm} (100%) rename packaging/{hip-vdi.txt => hip-rocclr.txt} (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index b8e2c6947b..7fbb5f4739 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,8 +1,8 @@ cmake_minimum_required(VERSION 3.4.3) project(hip) -# sample command for hip-vdi, you'll need to have vdi installed +# sample command for hip-rocclr, you'll need to have rocclr installed # cmake -DHIP_COMPILER=clang -DHIP_PLATFORM=vdi .. -# cmake -DHIP_COMPILER=clang -DHIP_PLATFORM=vdi -DVDI_DIR=/extra/lmoriche/hip-vdi/vdi -DOPENCL_DIR=/extra/lmoriche/clients/lmoriche_opencl_dev2/drivers/opencl/api/opencl -DLIBVDI_STATIC_DIR=/extra/lmoriche/hip-vdi/build/vdi .. +# cmake -DHIP_COMPILER=clang -DHIP_PLATFORM=vdi -DVDI_DIR=/extra/lmoriche/hip-rocclr/rocclr -DOPENCL_DIR=/extra/lmoriche/clients/lmoriche_opencl_dev2/drivers/opencl/api/opencl -DLIBVDI_STATIC_DIR=/extra/lmoriche/hip-rocclr/build/rocclr .. set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") @@ -530,10 +530,10 @@ if(HIP_PLATFORM STREQUAL "hcc") DEPENDS hip_hcc hip_hcc_static hiprtc) else() set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/vdi) - configure_file(packaging/hip-vdi.txt ${BUILD_DIR}/CMakeLists.txt @ONLY) - configure_file(packaging/hip-vdi.postinst ${BUILD_DIR}/postinst @ONLY) - configure_file(packaging/hip-vdi.prerm ${BUILD_DIR}/prerm @ONLY) - add_custom_target(hip_on_vdi COMMAND ${CMAKE_COMMAND} . + configure_file(packaging/hip-rocclr.txt ${BUILD_DIR}/CMakeLists.txt @ONLY) + configure_file(packaging/hip-rocclr.postinst ${BUILD_DIR}/postinst @ONLY) + configure_file(packaging/hip-rocclr.prerm ${BUILD_DIR}/prerm @ONLY) + add_custom_target(hip_on_rocclr COMMAND ${CMAKE_COMMAND} . COMMAND rm -rf *.deb *.rpm *.tar.gz COMMAND make package COMMAND cp *.deb ${PROJECT_BINARY_DIR} @@ -594,7 +594,7 @@ elseif(HIP_PLATFORM STREQUAL "vdi") add_custom_target(package COMMAND bash ${PROJECT_BINARY_DIR}/fixnames WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - DEPENDS pkg_hip_base hip_on_vdi pkg_hip_nvcc pkg_hip_doc pkg_hip_samples) + DEPENDS pkg_hip_base hip_on_rocclr pkg_hip_nvcc pkg_hip_doc pkg_hip_samples) endif() if(POLICY CMP0037) diff --git a/packaging/hip-vdi.postinst b/packaging/hip-rocclr.postinst similarity index 100% rename from packaging/hip-vdi.postinst rename to packaging/hip-rocclr.postinst diff --git a/packaging/hip-vdi.prerm b/packaging/hip-rocclr.prerm similarity index 100% rename from packaging/hip-vdi.prerm rename to packaging/hip-rocclr.prerm diff --git a/packaging/hip-vdi.txt b/packaging/hip-rocclr.txt similarity index 100% rename from packaging/hip-vdi.txt rename to packaging/hip-rocclr.txt diff --git a/vdi/CMakeLists.txt b/vdi/CMakeLists.txt index 5460b2ba6b..5283748724 100644 --- a/vdi/CMakeLists.txt +++ b/vdi/CMakeLists.txt @@ -28,7 +28,7 @@ set(USE_PROF_API "1") if(NOT DEFINED LIBVDI_STATIC_DIR) find_path(LIBVDI_STATIC_DIR NAMES libamdvdi_static.a - PATHS /opt/rocm/vdi + PATHS /opt/rocm/rocclr PATH_SUFFIXES lib ) endif() @@ -37,10 +37,10 @@ if(NOT DEFINED VDI_DIR) find_path(VDI_DIR NAMES top.hpp PATH_SUFFIXES include - PATHS /opt/rocm/vdi + PATHS /opt/rocm/rocclr ) endif() -message("Found Static vdi lib:${LIBVDI_STATIC_DIR} and vdi includes: ${VDI_DIR}") +message("Found Static rocclr lib:${LIBVDI_STATIC_DIR} and rocclr includes: ${VDI_DIR}") set(PROF_API_HEADER_PATH ${VDI_DIR}/platform) ############################# # Profiling API support @@ -161,7 +161,7 @@ set_target_properties( VERSION ${HIP_LIB_VERSION_STRING} SOVERSION ${HIP_LIB_VERSION_MAJOR} ) -# We expect amdhip64_static to contain objects of vdi and hip. But linker +# We expect amdhip64_static to contain objects of rocclr and hip. But linker # let amdhip64_static contain objects of hip only. So we will use a # a custom amdhip64_static_combiner to combine objects of vid and hip into # amdhip64_static. To avoid amdhip64_static contains itself, From 2de1129510e29ed0302dc66b9e7fe55135751e10 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Wed, 29 Apr 2020 16:51:33 -0400 Subject: [PATCH 03/33] Update hipcc to use HIP_COMPILER variable This will update the hipcc script so that it will use HIP_PLATFORM, HIP_COMPILER, and HIP_RUNTIME variables correctly based on updated hipconfig file. Removing HIP_PLATFORM=clang, only use hcc and nvcc for now. HIP_PLATFORM should eventually switch to amd or nvcc. Change-Id: Ie527d07a1add974cc3aab8e9f61b0518147d7602 --- bin/hipcc | 71 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/bin/hipcc b/bin/hipcc index 76d87c0c0b..f86a5f7fa3 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -20,13 +20,15 @@ use Cwd 'abs_path'; # If HIP_PLATFORM is not set hipcc will attempt auto-detect based on if nvcc is found. # # Other environment variable controls: -# HIP_PATH : Path to HIP directory, default is one dir level above location of this script -# CUDA_PATH : Path to CUDA SDK (default /usr/local/cuda). Used on NVIDIA platforms only. -# HCC_HOME : Path to HCC SDK (defaults to ../../hcc relative to this -# script's abs_path). Used on AMD platforms only. -# HSA_PATH : Path to HSA dir (defaults to ../../hsa relative to abs_path -# of this script). Used on AMD platforms only. -# HIP_VDI_HOME : Path to HIP/VDI directory. Used on AMD platforms only. +# HIP_PATH : Path to HIP directory, default is one dir level above location of this script. +# CUDA_PATH : Path to CUDA SDK (default /usr/local/cuda). Used on NVIDIA platforms only. +# HCC_HOME : Path to HCC SDK (defaults to ../../hcc relative to this +# script's abs_path). Used on AMD platforms only. +# HSA_PATH : Path to HSA dir (defaults to ../../hsa relative to abs_path +# of this script). Used on AMD platforms only. +# HIP_VDI_HOME : Path to HIP/VDI directory. Used on AMD platforms only. +# HIP_CLANG_PATH : Path to HIP-Clang (default to ../../llvm/bin relative to this +# script's abs_path). Used on AMD platforms only. if(scalar @ARGV == 0){ print "No Arguments passed, exiting ...\n"; @@ -120,11 +122,13 @@ sub delete_temp_dirs { } #--- -#HIP_PLATFORM controls whether to use NVCC or HCC for compilation: +#HIP_PLATFORM controls whether to use hcc (AMD) or nvcc as the platform: $HIP_PLATFORM= `$HIP_PATH/bin/hipconfig --platform` // "hcc"; $HIP_VERSION= `$HIP_PATH/bin/hipconfig --version`; -$HIP_COMPILER= $hipConfig{'HIP_COMPILER'}; -$HIP_RUNTIME= $hipConfig{'HIP_RUNTIME'}; +#HIP_COMPILER controls whether to use hcc, clang or nvcc for compilation: +$HIP_COMPILER= `$HIP_PATH/bin/hipconfig --compiler`; +#HIP_RUNTIME controls whether to use HCC, VDI, or NVCC as the runtime: +$HIP_RUNTIME= `$HIP_PATH/bin/hipconfig --runtime`; # If using VDI runtime, need to find HIP_VDI_HOME if (defined $HIP_RUNTIME and $HIP_RUNTIME eq "VDI" and !defined $HIP_VDI_HOME) { @@ -151,8 +155,7 @@ if (defined $HIP_VDI_HOME) { } } -if (defined $HIP_COMPILER and $HIP_COMPILER eq "clang") { - $HIP_PLATFORM = "clang"; +if (defined $HIP_COMPILER and $HIP_PLATFORM eq "hcc" and $HIP_COMPILER eq "clang") { if (!defined $HIP_CLANG_PATH) { $HIP_CLANG_PATH = "$ROCM_PATH/llvm/bin"; } @@ -164,6 +167,8 @@ if (defined $HIP_COMPILER and $HIP_COMPILER eq "clang") { if ($verbose & 0x2) { print ("HIP_PATH=$HIP_PATH\n"); print ("HIP_PLATFORM=$HIP_PLATFORM\n"); + print ("HIP_COMPILER=$HIP_COMPILER\n"); + print ("HIP_RUNTIME=$HIP_RUNTIME\n"); } # set if user explicitly requests -stdlib=libc++. (else we default to libstdc++ for better interop with g++): @@ -171,7 +176,7 @@ $setStdLib = 0; # TODO - set to 0 $default_amdgpu_target = 1; -if ($HIP_PLATFORM eq "clang") { +if ($HIP_PLATFORM eq "hcc" and $HIP_COMPILER eq "clang") { $HIPCC="$HIP_CLANG_PATH/clang++"; # If $HIPCC clang++ is not compiled, use clang instead @@ -228,7 +233,7 @@ if ($HIP_PLATFORM eq "clang") { $HIPCFLAGS .= " -D__HIP_VDI__"; } -} elsif ($HIP_PLATFORM eq "hcc") { +} elsif ($HIP_PLATFORM eq "hcc" and $HIP_COMPILER eq "hcc") { $HIP_INCLUDE_PATH = "$HIP_PATH/include"; if (! defined $HIP_LIB_PATH) { $HIP_LIB_PATH = "$HIP_PATH/lib"; @@ -331,6 +336,7 @@ if ($HIP_PLATFORM eq "clang") { $HIPLDFLAGS = " -Wno-deprecated-gpu-targets -lcuda -lcudart -L$CUDA_PATH/lib64"; } else { printf ("error: unknown HIP_PLATFORM = '$HIP_PLATFORM'"); + printf (" or HIP_COMPILER = '$HIP_COMPILER'"); exit (-1); } @@ -346,7 +352,7 @@ my $hasC = 0; # options contain a c-style file my $hasCXX = 0; # options contain a cpp-style file (NVCC must force recognition as GPU file) my $hasCU = 0; # options contain a cu-style file (HCC must force recognition as GPU file) my $hasHIP = 0; # options contain a hip-style file (HIP-Clang must pass offloading options) -my $needHipHcc = ($HIP_PLATFORM eq 'hcc'); # set if we need to link hip_hcc.o from src tree. (some builds, ie cmake, provide their own) +my $needHipHcc = ($HIP_PLATFORM eq 'hcc' and $HIP_COMPILER eq 'hcc'); # set if we need to link hip_hcc.o from src tree. (some builds, ie cmake, provide their own) my $printHipVersion = 0; # print HIP version my $printCXXFlags = 0; # print HIPCXXFLAGS my $printLDFlags = 0; # print HIPLDFLAGS @@ -370,7 +376,7 @@ if ($verbose & 0x4) { # Handle code object generation my $ISACMD=""; -if($HIP_PLATFORM eq "hcc"){ +if($HIP_PLATFORM eq "hcc" and $HIP_COMPILER eq "hcc"){ $ISACMD .= "$HIP_PATH/bin/lpl "; if($ARGV[0] eq "--genco"){ foreach $isaarg (@ARGV[1..$#ARGV]){ @@ -385,7 +391,7 @@ if($HIP_PLATFORM eq "hcc"){ } } -if(($HIP_PLATFORM eq "hcc")){ +if(($HIP_PLATFORM eq "hcc" and $HIP_COMPILER eq "hcc")){ $ENV{HCC_EXTRA_LIBRARIES}="\n"; } @@ -446,7 +452,7 @@ foreach $arg (@ARGV) $targetsStr .= substr($arg, length($targetOpt)); $default_amdgpu_target = 0; # hip-clang does not accept --amdgpu-target= options. - if ($HIP_PLATFORM eq 'clang') { + if ($HIP_PLATFORM eq "hcc" and $HIP_COMPILER eq "clang") { $swallowArg = 1; } } @@ -459,7 +465,7 @@ foreach $arg (@ARGV) $coFormatv3 = 0; } - if (($arg =~ /--genco/) and $HIP_PLATFORM eq 'clang' ) { + if (($arg =~ /--genco/) and $HIP_PLATFORM eq 'hcc' and $HIP_COMPILER eq 'clang' ) { $arg = "--cuda-device-only"; } @@ -512,7 +518,7 @@ foreach $arg (@ARGV) ## hip-clang in command line. ## ToDo: Remove this after hip-clang switch to lto and lld is able to ## handle clang-offload-bundler bundles. - if ($arg =~ m/^-Wl,@/ and $HIP_PLATFORM eq 'clang') { + if ($arg =~ m/^-Wl,@/ and $HIP_PLATFORM eq 'hcc' and $HIP_COMPILER eq 'clang') { my $file = substr $arg, 5; open my $in, "<:encoding(utf8)", $file or die "$file: $!"; my $new_arg = ""; @@ -568,13 +574,12 @@ foreach $arg (@ARGV) close $out; $arg = "$new_arg -Wl,\@$new_file"; } elsif (($arg =~ m/\.a$/ || $arg =~ m/\.lo$/) && - $HIP_PLATFORM eq 'clang') { + $HIP_PLATFORM eq 'hcc' and $HIP_COMPILER eq 'clang') { ## process static library for hip-clang ## extract object files from static library and pass them directly to ## hip-clang. ## ToDo: Remove this after hip-clang switch to lto and lld is able to ## handle clang-offload-bundler bundles. - my $new_arg = ""; my $tmpdir = get_temp_dir (); my $libFile = $arg; @@ -661,7 +666,7 @@ foreach $arg (@ARGV) } elsif ((($arg =~ /\.cu$/ or $arg =~ /\.cuh$/) and $HIP_COMPILE_CXX_AS_HIP ne '0') or ($arg =~ /\.hip$/)) { $needCXXFLAGS = 1; - if ($HIP_COMPILER eq "clang") { + if ($HIP_PLATFORM eq "hcc" and $HIP_COMPILER eq "clang") { $hasHIP = 1; $toolArgs .= " -x hip"; } else { @@ -675,7 +680,7 @@ foreach $arg (@ARGV) $prevArg = $arg; } -if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){ +if($HIP_PLATFORM eq "hcc"){ # No AMDGPU target specified at commandline. So look for HCC_AMDGPU_TARGET if($default_amdgpu_target eq 1) { if (defined $ENV{HCC_AMDGPU_TARGET}) { @@ -692,7 +697,7 @@ if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){ # Parse the targets collected in targetStr and set corresponding compiler options. my @targets = split(',', $targetsStr); - if($HIP_PLATFORM eq "hcc") { + if($HIP_COMPILER eq "hcc") { $GPU_ARCH_OPT = " --amdgpu-target="; } else { $GPU_ARCH_OPT = " --cuda-gpu-arch="; @@ -708,7 +713,7 @@ if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){ $GPU_ARCH_ARG = $GPU_ARCH_OPT . $val; $HIPLDARCHFLAGS .= $GPU_ARCH_ARG; $HIPCXXFLAGS .= $archMacro; - if ($HIP_PLATFORM eq 'clang' and $hasHIP) { + if ($HIP_COMPILER eq 'clang' and $hasHIP) { $HIPCXXFLAGS .= $GPU_ARCH_ARG; } @@ -729,7 +734,7 @@ if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){ # hcc defaults to v2, so we need to convert to the appropriate flag # hip-clang defaults to v3, so we don't need to do anything -if ($coFormatv3 and $HIP_PLATFORM eq 'hcc') { +if ($coFormatv3 and $HIP_PLATFORM eq 'hcc' and $HIP_COMPILER eq 'hcc') { $HIPLDFLAGS .= " -mcode-object-v3"; $HIPCXXFLAGS .= " -mcode-object-v3"; } @@ -737,7 +742,7 @@ if ($coFormatv3 and $HIP_PLATFORM eq 'hcc') { if ($hasCXX and $HIP_PLATFORM eq 'nvcc') { $HIPCXXFLAGS .= " -x cu"; } -if ($hasCU and $HIP_PLATFORM eq 'hcc') { +if ($hasCU and $HIP_PLATFORM eq 'hcc' and $HIP_COMPILER eq 'hcc') { $HIPCXXFLAGS .= " -x c++"; } @@ -746,17 +751,17 @@ if ($buildDeps and $HIP_PLATFORM eq 'nvcc') { $HIPCFLAGS .= " -M -D__CUDACC__"; } -if ($buildDeps and $HIP_PLATFORM eq 'clang') { +if ($buildDeps and $HIP_PLATFORM eq 'hcc' and $HIP_COMPILER eq 'clang') { $HIPCXXFLAGS .= " --cuda-host-only"; } # Add --hip-link only if it is compile only and -fgpu-rdc is on. -if ($rdc and !$compileOnly and $HIP_PLATFORM eq 'clang') { +if ($rdc and !$compileOnly and $HIP_PLATFORM eq 'hcc' and $HIP_COMPILER eq 'clang') { $HIPLDFLAGS .= " --hip-link"; $HIPLDFLAGS .= $HIPLDARCHFLAGS; } -if ($setStdLib eq 0 and $HIP_PLATFORM eq 'hcc') +if ($setStdLib eq 0 and $HIP_PLATFORM eq 'hcc' and $HIP_COMPILER eq 'hcc') { $HIPCXXFLAGS .= $HCC_WA_FLAGS; } @@ -774,7 +779,7 @@ if ($needHipHcc) { # Reason is that NVCC uses the file extension to determine whether to compile in CUDA mode or # pass-through CPP mode. -if ($HIP_PLATFORM eq "clang") { +if ($HIP_PLATFORM eq "hcc" and $HIP_COMPILER eq "clang") { # Set default optimization level to -O3 for hip-clang. if ($optArg eq "") { $HIPCXXFLAGS .= " -O3"; @@ -849,7 +854,7 @@ if ($printLDFlags) { print $HIPLDFLAGS; } if ($runCmd) { - if ($HIP_PLATFORM eq "hcc" and exists($hipConfig{'HCC_VERSION'}) and $HCC_VERSION ne $hipConfig{'HCC_VERSION'}) { + if ($HIP_PLATFORM eq "hcc" and $HIP_COMPILER eq "hcc" and exists($hipConfig{'HCC_VERSION'}) and $HCC_VERSION ne $hipConfig{'HCC_VERSION'}) { print ("HIP ($HIP_PATH) was built using hcc $hipConfig{'HCC_VERSION'}, but you are using $HCC_HOME/hcc with version $HCC_VERSION from hipcc. Please rebuild HIP including cmake or update HCC_HOME variable.\n") ; die unless $ENV{'HIP_IGNORE_HCC_VERSION'}; } From 1f5cc41d6417abe058d9de6da78770419b2cd7b1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 30 Apr 2020 14:13:43 -0400 Subject: [PATCH 04/33] Find python to generate prof API headers. Don't rely on shell commands. On Ubuntu 20.04, /usr/bin/python no longer exists, so I'm seeing some failures from somewhere assuming the path. The top level CMakeLists also repeats exactly this, which should be fixed. Change-Id: I56b26742920f0dc40b363b409892bd41cfa485ef --- vdi/CMakeLists.txt | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/vdi/CMakeLists.txt b/vdi/CMakeLists.txt index 5283748724..8908159d12 100644 --- a/vdi/CMakeLists.txt +++ b/vdi/CMakeLists.txt @@ -13,6 +13,8 @@ set (CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(LIB_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/lib) set(CONFIG_PACKAGE_INSTALL_DIR ${LIB_INSTALL_DIR}/cmake/hip) +find_package(PythonInterp REQUIRED) + add_definitions(-D__HIP_VDI__ -D__HIP_PLATFORM_HCC__ -DLINUX -D__x86_64__ -D__AMD64__ -DUNIX_OS -DqLittleEndian -DOPENCL_MAJOR=2 -DOPENCL_MINOR=0 -DCL_TARGET_OPENCL_VERSION=220 -DWITH_AQL -DWITH_ONLINE_COMPILER -DATI_OS_LINUX -DATI_ARCH_X86 -DLITTLEENDIAN_CPU -DATI_BITS_64 -DATI_COMP_GCC -DWITH_HSA_DEVICE -DWITH_TARGET_AMDGCN -DOPENCL_EXPORTS -DCL_USE_DEPRECATED_OPENCL_1_0_APIS -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -DCL_USE_DEPRECATED_OPENCL_1_2_APIS -DCL_USE_DEPRECATED_OPENCL_2_0_APIS -DVEGA10_ONLY=false -DWITH_LIGHTNING_COMPILER -DUSE_PROF_API) if(CMAKE_BUILD_TYPE MATCHES "^Debug$") @@ -46,15 +48,19 @@ set(PROF_API_HEADER_PATH ${VDI_DIR}/platform) # Profiling API support ############################# # Generate profiling API macros/structures header +# FIXME: This should not be writing to the source directory set(PROF_API_STR "${CMAKE_CURRENT_SOURCE_DIR}/../include/hip/hcc_detail/hip_prof_str.h") set(PROF_API_HDR "${CMAKE_CURRENT_SOURCE_DIR}/../include/hip/hcc_detail/hip_runtime_api.h") set(PROF_API_SRC "${CMAKE_CURRENT_SOURCE_DIR}") set(PROF_API_GEN "${CMAKE_CURRENT_SOURCE_DIR}/hip_prof_gen.py") set(PROF_API_LOG "${PROJECT_BINARY_DIR}/hip_prof_gen.log.txt") -set(PROF_API_CMD "${PROF_API_GEN} -v -t --priv ${OPT_PROF_API} ${PROF_API_HDR} ${PROF_API_SRC} ${PROF_API_STR} >${PROF_API_LOG}") -MESSAGE(STATUS "Generating profiling promitives: ${PROF_API_STR}") -execute_process(COMMAND sh -c "rm -f ${PROF_API_STR}; ${PROF_API_CMD}") -#MESSAGE(COMMAND sh -c "rm -f ${PROF_API_STR}; ${PROF_API_CMD}") +message(STATUS "Generating profiling primitives: ${PROF_API_STR}") + +# FIXME: Do we really need to remove this file first? +execute_process( + COMMAND ${CMAKE_COMMAND} -E remove -f ${PROF_API_STR} + COMMAND ${PYTHON_EXECUTABLE} ${PROF_API_GEN} -v -t --priv ${OPT_PROF_API} ${PROF_API_HDR} ${PROF_API_SRC} ${PROF_API_STR} + OUTPUT_FILE ${PROF_API_LOG}) set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${PROF_API_GEN} ${PROF_API_HDR} ${PROF_API_STR}) # Enable profiling API From b69e33038b1bc48f13094dcf99fb2bb5d71437cb Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 30 Apr 2020 15:02:23 -0400 Subject: [PATCH 05/33] Don't emit generated hip_prof_str.h to source directory The build should never touch the source directory, and only write generated files to the output directory. Also track the dependency with a custom command and target, as is the normal method for dependencies on generated files. Change-Id: I9d835256c643aeef241d26ca05ab390ebba65111 --- .gitignore | 1 - CMakeLists.txt | 6 ++++++ vdi/CMakeLists.txt | 32 ++++++++++++++++++++++---------- vdi/hip_prof_gen.py | 6 ++++++ 4 files changed, 34 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index 4bea5cb95c..64cdd493a6 100644 --- a/.gitignore +++ b/.gitignore @@ -10,7 +10,6 @@ bin/hipInfo bin/hipBusBandwidth bin/hipDispatchLatency bin/hipify-clang -include/hip/hcc_detail/hip_prof_str.h include/hip/hip_version.h tags samples/1_Utils/hipInfo/hipInfo diff --git a/CMakeLists.txt b/CMakeLists.txt index 7fbb5f4739..609ca5aec8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -459,6 +459,12 @@ if(NOT ${INSTALL_SOURCE} EQUAL 0) install(DIRECTORY cmake DESTINATION .) endif() +# Install generated headers +# FIXME: Associate with individual targets. +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include + DESTINATION include + FILES_MATCHING PATTERN "*.h*") + ############################# # hip-config ############################# diff --git a/vdi/CMakeLists.txt b/vdi/CMakeLists.txt index 8908159d12..a91d6d064c 100644 --- a/vdi/CMakeLists.txt +++ b/vdi/CMakeLists.txt @@ -49,19 +49,21 @@ set(PROF_API_HEADER_PATH ${VDI_DIR}/platform) ############################# # Generate profiling API macros/structures header # FIXME: This should not be writing to the source directory -set(PROF_API_STR "${CMAKE_CURRENT_SOURCE_DIR}/../include/hip/hcc_detail/hip_prof_str.h") +set(PROF_API_STR "${PROJECT_BINARY_DIR}/include/hip/hcc_detail/hip_prof_str.h") set(PROF_API_HDR "${CMAKE_CURRENT_SOURCE_DIR}/../include/hip/hcc_detail/hip_runtime_api.h") set(PROF_API_SRC "${CMAKE_CURRENT_SOURCE_DIR}") set(PROF_API_GEN "${CMAKE_CURRENT_SOURCE_DIR}/hip_prof_gen.py") set(PROF_API_LOG "${PROJECT_BINARY_DIR}/hip_prof_gen.log.txt") -message(STATUS "Generating profiling primitives: ${PROF_API_STR}") -# FIXME: Do we really need to remove this file first? -execute_process( - COMMAND ${CMAKE_COMMAND} -E remove -f ${PROF_API_STR} +add_custom_command(OUTPUT ${PROF_API_STR} COMMAND ${PYTHON_EXECUTABLE} ${PROF_API_GEN} -v -t --priv ${OPT_PROF_API} ${PROF_API_HDR} ${PROF_API_SRC} ${PROF_API_STR} - OUTPUT_FILE ${PROF_API_LOG}) -set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${PROF_API_GEN} ${PROF_API_HDR} ${PROF_API_STR}) + OUTPUT_FILE ${PROF_API_LOG} + DEPENDS ${PROF_API_HDR} ${PROF_API_GEN} + COMMENT "Generating profiling primitives: ${PROF_API_STR}") + +add_custom_target(gen-prof-api-str-header ALL + DEPENDS ${PROF_API_STR} + SOURCES ${PROF_API_HDR}) # Enable profiling API if(USE_PROF_API EQUAL 1) @@ -98,6 +100,7 @@ endif() # if (DEFINED LLVM_INCLUDES AND NOT ${LLVM_INCLUDES} STREQUAL "") include_directories(${CMAKE_SOURCE_DIR}) include_directories(${CMAKE_SOURCE_DIR}/include) +include_directories(${PROJECT_BINARY_DIR}/include) include_directories(${CMAKE_SOURCE_DIR}/elfio) include_directories(${CMAKE_SOURCE_DIR}/amdocl) include_directories(${CMAKE_SOURCE_DIR}/include/hip/hcc_detail/elfio) @@ -121,7 +124,7 @@ include_directories("$" add_definitions(-DBSD_LIBELF) -add_library(hip64 OBJECT +add_library(hip64 OBJECT hip_context.cpp hip_device.cpp hip_device_runtime.cpp @@ -143,12 +146,14 @@ add_library(hip64 OBJECT fixme.cpp ) set_target_properties(hip64 PROPERTIES POSITION_INDEPENDENT_CODE ON) + set_target_properties( hip64 PROPERTIES CXX_STANDARD 14 CXX_STANDARD_REQUIRED ON CXX_EXTENSIONS OFF -) + ) +add_dependencies(hip64 gen-prof-api-str-header) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) @@ -166,7 +171,14 @@ set_target_properties( amdhip64 PROPERTIES VERSION ${HIP_LIB_VERSION_STRING} SOVERSION ${HIP_LIB_VERSION_MAJOR} -) + ) + +set_target_properties(hip64 PROPERTIES PUBLIC_HEADER ${PROF_API_STR}) +set_target_properties(amdhip64 PROPERTIES PUBLIC_HEADER ${PROF_API_STR}) +set_target_properties(amdhip64_static PROPERTIES PUBLIC_HEADER ${PROF_API_STR}) + + + # We expect amdhip64_static to contain objects of rocclr and hip. But linker # let amdhip64_static contain objects of hip only. So we will use a # a custom amdhip64_static_combiner to combine objects of vid and hip into diff --git a/vdi/hip_prof_gen.py b/vdi/hip_prof_gen.py index 2eb10e9ca3..c20df3c1aa 100755 --- a/vdi/hip_prof_gen.py +++ b/vdi/hip_prof_gen.py @@ -603,6 +603,12 @@ if len(opts_map) != 0: if not_found != 0: error(str(not_found) + " API calls missing in interception layer") +# The output subdirectory seems to exist or not depending on the +# version of cmake. +output_dir = os.path.dirname(OUTPUT) +if not os.path.exists(output_dir): + os.makedirs(output_dir) + # Generating output header file with open(OUTPUT, 'w') as f: generate_prof_header(f, api_map, opts_map) From 253962c9c4b5a8dacd30a3d3836070d0b83e3b3a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 30 Apr 2020 13:03:27 -0400 Subject: [PATCH 06/33] Don't directly link pthreads This was already searching for the right threads library, but ignoring the result. Change-Id: I10d898245696135c1ef928c7715efce8ec6b939f --- vdi/CMakeLists.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/vdi/CMakeLists.txt b/vdi/CMakeLists.txt index a91d6d064c..5defe45ac1 100644 --- a/vdi/CMakeLists.txt +++ b/vdi/CMakeLists.txt @@ -195,9 +195,11 @@ target_link_libraries(device INTERFACE host) # TODO: we may create host_static and device_static to let app # link amdhip64_static -target_link_libraries(amdhip64 PRIVATE amdvdi_static pthread dl) -target_link_libraries(amdhip64_static PRIVATE pthread dl) -target_link_libraries(amdhip64_static_temp PRIVATE pthread dl) +# FIXME: Linux convention is to create static library with same base +# filename. +target_link_libraries(amdhip64 PRIVATE amdvdi_static Threads::Threads dl) +target_link_libraries(amdhip64_static PRIVATE Threads::Threads dl) +target_link_libraries(amdhip64_static_temp PRIVATE Threads::Threads dl) # combine objects of vid and hip into amdhip64_static add_custom_target( From 64507de69449010bd2254321f1e54f48c3bbd0a3 Mon Sep 17 00:00:00 2001 From: Michael LIAO Date: Fri, 1 May 2020 12:18:52 -0400 Subject: [PATCH 07/33] Fix more typos from 5429b40afeac311a48df31adb01b2dc37d3b11fd. Change-Id: I75ed28a5862daffc0778910d7ba3b97f51a87949 --- include/hip/hcc_detail/hip_runtime_api.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 4ab66477fb..0779c64db7 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -3944,7 +3944,7 @@ static inline hipError_t hipBindTextureToArray( { struct hipChannelFormatDesc desc; hipError_t err = hipGetChannelDesc(&desc, array); - return (err == hipSuccess) ? hipBindTextureToArray(&tex, array, desc) : err; + return (err == hipSuccess) ? hipBindTextureToArray(&tex, array, &desc) : err; } template @@ -3968,7 +3968,7 @@ static inline hipError_t hipBindTextureToMipmappedArray( return err; } err = hipGetChannelDesc(&desc, levelArray); - return (err == hipSuccess) ? hipBindTextureToMipmappedArray(&tex, mipmappedArray, desc) : err; + return (err == hipSuccess) ? hipBindTextureToMipmappedArray(&tex, mipmappedArray, &desc) : err; } template From 9b39b9541716e0ceb8b1068e192fbd6e09b78db3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 1 May 2020 12:43:49 -0400 Subject: [PATCH 08/33] Fix installing generated header to include/include This isn't the ideal solution, but a lot more cleanups are needed for how install is used. Change-Id: I63a9c1a46d0da13ee4373038ece228005207271a --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 609ca5aec8..d208c19fb5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -461,7 +461,7 @@ endif() # Install generated headers # FIXME: Associate with individual targets. -install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/hip DESTINATION include FILES_MATCHING PATTERN "*.h*") From 1468982f8adb6b6c24a00090c935f4040d612c1b Mon Sep 17 00:00:00 2001 From: agodavar Date: Wed, 29 Apr 2020 05:41:12 -0400 Subject: [PATCH 09/33] find_package(hip) changes for clang Change-Id: I9dc1be1dd5f9424e26ebcd2961207e1c4467550f --- bin/hipcc_cmake_linker_helper | 7 +- cmake/FindHIP.cmake | 138 +++++++++++++----- cmake/FindHIP/run_hipcc.cmake | 19 ++- .../CMakeLists.txt | 3 +- .../12_cmake_hip_add_executable/Readme.md | 4 +- tests/README.md | 5 +- tests/hit/HIT.cmake | 28 +++- tests/src/deviceLib/hipMathFunctions.cpp | 2 +- 8 files changed, 155 insertions(+), 51 deletions(-) diff --git a/bin/hipcc_cmake_linker_helper b/bin/hipcc_cmake_linker_helper index bd4f6c118d..4870531881 100755 --- a/bin/hipcc_cmake_linker_helper +++ b/bin/hipcc_cmake_linker_helper @@ -2,4 +2,9 @@ SOURCE="${BASH_SOURCE[0]}" HIP_PATH="$( command cd -P "$( dirname "$SOURCE" )/.." && pwd )" -HCC_HOME=$1 $HIP_PATH/bin/hipcc "${@:2}" +HIP_COMPILER=$(eval "$HIP_PATH/bin/hipconfig --compiler") +if [ "$HIP_COMPILER" = "hcc" ]; then + HCC_HOME=$1 $HIP_PATH/bin/hipcc "${@:2}" +elif [ "$HIP_COMPILER" = "clang" ]; then + HIP_CLANG_PATH=$1 $HIP_PATH/bin/hipcc "${@:2}" +fi \ No newline at end of file diff --git a/cmake/FindHIP.cmake b/cmake/FindHIP.cmake index 0819a0364c..2331a31347 100644 --- a/cmake/FindHIP.cmake +++ b/cmake/FindHIP.cmake @@ -8,42 +8,23 @@ # User defined flags set(HIP_HIPCC_FLAGS "" CACHE STRING "Semicolon delimited flags for HIPCC") set(HIP_HCC_FLAGS "" CACHE STRING "Semicolon delimited flags for HCC") +set(HIP_CLANG_FLAGS "" CACHE STRING "Semicolon delimited flags for CLANG") set(HIP_NVCC_FLAGS "" CACHE STRING "Semicolon delimted flags for NVCC") -mark_as_advanced(HIP_HIPCC_FLAGS HIP_HCC_FLAGS HIP_NVCC_FLAGS) +mark_as_advanced(HIP_HIPCC_FLAGS HIP_HCC_FLAGS HIP_CLANG_FLAGS HIP_NVCC_FLAGS) set(_hip_configuration_types ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE} Debug MinSizeRel Release RelWithDebInfo) list(REMOVE_DUPLICATES _hip_configuration_types) foreach(config ${_hip_configuration_types}) string(TOUPPER ${config} config_upper) set(HIP_HIPCC_FLAGS_${config_upper} "" CACHE STRING "Semicolon delimited flags for HIPCC") set(HIP_HCC_FLAGS_${config_upper} "" CACHE STRING "Semicolon delimited flags for HCC") + set(HIP_CLANG_FLAGS_${config_upper} "" CACHE STRING "Semicolon delimited flags for CLANG") set(HIP_NVCC_FLAGS_${config_upper} "" CACHE STRING "Semicolon delimited flags for NVCC") - mark_as_advanced(HIP_HIPCC_FLAGS_${config_upper} HIP_HCC_FLAGS_${config_upper} HIP_NVCC_FLAGS_${config_upper}) + mark_as_advanced(HIP_HIPCC_FLAGS_${config_upper} HIP_HCC_FLAGS_${config_upper} HIP_CLANG_FLAGS_${config_upper} HIP_NVCC_FLAGS_${config_upper}) endforeach() option(HIP_HOST_COMPILATION_CPP "Host code compilation mode" ON) option(HIP_VERBOSE_BUILD "Print out the commands run while compiling the HIP source file. With the Makefile generator this defaults to VERBOSE variable specified on the command line, but can be forced on with this option." OFF) mark_as_advanced(HIP_HOST_COMPILATION_CPP) -############################################################################### -# Set HIP CMAKE Flags -############################################################################### -# Copy the invocation styles from CXX to HIP -set(CMAKE_HIP_ARCHIVE_CREATE ${CMAKE_CXX_ARCHIVE_CREATE}) -set(CMAKE_HIP_ARCHIVE_APPEND ${CMAKE_CXX_ARCHIVE_APPEND}) -set(CMAKE_HIP_ARCHIVE_FINISH ${CMAKE_CXX_ARCHIVE_FINISH}) -set(CMAKE_SHARED_LIBRARY_SONAME_HIP_FLAG ${CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG}) -set(CMAKE_SHARED_LIBRARY_CREATE_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS}) -set(CMAKE_SHARED_LIBRARY_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_CXX_FLAGS}) -#set(CMAKE_SHARED_LIBRARY_LINK_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS}) -set(CMAKE_SHARED_LIBRARY_RUNTIME_HIP_FLAG ${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG}) -set(CMAKE_SHARED_LIBRARY_RUNTIME_HIP_FLAG_SEP ${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG_SEP}) -set(CMAKE_SHARED_LIBRARY_LINK_STATIC_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_LINK_STATIC_CXX_FLAGS}) -set(CMAKE_SHARED_LIBRARY_LINK_DYNAMIC_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_LINK_DYNAMIC_CXX_FLAGS}) - -# Set the CMake Flags to use the HCC Compilier. -set(CMAKE_HIP_CREATE_SHARED_LIBRARY "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_PATH} -o ") -set(CMAKE_HIP_CREATE_SHARED_MODULE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_PATH} -o -shared" ) -set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_PATH} -o ") - ############################################################################### # FIND: HIP and associated helper binaries ############################################################################### @@ -168,6 +149,28 @@ if(UNIX AND NOT APPLE AND NOT CYGWIN) set(HIP_PLATFORM ${_hip_platform} CACHE STRING "HIP platform as computed by hipconfig") mark_as_advanced(HIP_PLATFORM) endif() + + if(HIP_HIPCONFIG_EXECUTABLE AND NOT HIP_COMPILER) + # Compute the compiler + execute_process( + COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --compiler + OUTPUT_VARIABLE _hip_compiler + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + set(HIP_COMPILER ${_hip_compiler} CACHE STRING "HIP compiler as computed by hipconfig") + mark_as_advanced(HIP_COMPILER) + endif() + + if(HIP_HIPCONFIG_EXECUTABLE AND NOT HIP_RUNTIME) + # Compute the runtime + execute_process( + COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --runtime + OUTPUT_VARIABLE _hip_runtime + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + set(HIP_RUNTIME ${_hip_runtime} CACHE STRING "HIP runtime as computed by hipconfig") + mark_as_advanced(HIP_RUNTIME) + endif() endif() include(FindPackageHandleStandardArgs) @@ -178,9 +181,39 @@ find_package_handle_standard_args( HIP_HIPCC_EXECUTABLE HIP_HIPCONFIG_EXECUTABLE HIP_PLATFORM + HIP_COMPILER + HIP_RUNTIME VERSION_VAR HIP_VERSION ) +############################################################################### +# Set HIP CMAKE Flags +############################################################################### +# Copy the invocation styles from CXX to HIP +set(CMAKE_HIP_ARCHIVE_CREATE ${CMAKE_CXX_ARCHIVE_CREATE}) +set(CMAKE_HIP_ARCHIVE_APPEND ${CMAKE_CXX_ARCHIVE_APPEND}) +set(CMAKE_HIP_ARCHIVE_FINISH ${CMAKE_CXX_ARCHIVE_FINISH}) +set(CMAKE_SHARED_LIBRARY_SONAME_HIP_FLAG ${CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG}) +set(CMAKE_SHARED_LIBRARY_CREATE_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS}) +set(CMAKE_SHARED_LIBRARY_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_CXX_FLAGS}) +#set(CMAKE_SHARED_LIBRARY_LINK_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS}) +set(CMAKE_SHARED_LIBRARY_RUNTIME_HIP_FLAG ${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG}) +set(CMAKE_SHARED_LIBRARY_RUNTIME_HIP_FLAG_SEP ${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG_SEP}) +set(CMAKE_SHARED_LIBRARY_LINK_STATIC_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_LINK_STATIC_CXX_FLAGS}) +set(CMAKE_SHARED_LIBRARY_LINK_DYNAMIC_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_LINK_DYNAMIC_CXX_FLAGS}) + +if("${HIP_COMPILER}" STREQUAL "hcc") + # Set the CMake Flags to use the HCC Compiler. + set(CMAKE_HIP_CREATE_SHARED_LIBRARY "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} -o ") + set(CMAKE_HIP_CREATE_SHARED_MODULE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} -o -shared" ) + set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} -o ") +elseif("${HIP_COMPILER}" STREQUAL "clang") + # Set the CMake Flags to use the HIP-Clang Compiler. + set(CMAKE_HIP_CREATE_SHARED_LIBRARY "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HIP_CLANG_PATH} -o ") + set(CMAKE_HIP_CREATE_SHARED_MODULE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HIP_CLANG_PATH} -o -shared" ) + set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HIP_CLANG_PATH} -o ") +endif() + ############################################################################### # MACRO: Locate helper files ############################################################################### @@ -213,11 +246,13 @@ hip_find_helper_file(run_hipcc cmake) macro(HIP_RESET_FLAGS) unset(HIP_HIPCC_FLAGS) unset(HIP_HCC_FLAGS) + unset(HIP_CLANG_FLAGS) unset(HIP_NVCC_FLAGS) foreach(config ${_hip_configuration_types}) string(TOUPPER ${config} config_upper) unset(HIP_HIPCC_FLAGS_${config_upper}) unset(HIP_HCC_FLAGS_${config_upper}) + unset(HIP_CLANG_FLAGS_${config_upper}) unset(HIP_NVCC_FLAGS_${config_upper}) endforeach() endmacro() @@ -225,27 +260,37 @@ endmacro() ############################################################################### # MACRO: Separate the options from the sources ############################################################################### -macro(HIP_GET_SOURCES_AND_OPTIONS _sources _cmake_options _hipcc_options _hcc_options _nvcc_options) +macro(HIP_GET_SOURCES_AND_OPTIONS _sources _cmake_options _hipcc_options _hcc_options _clang_options _nvcc_options) set(${_sources}) set(${_cmake_options}) set(${_hipcc_options}) set(${_hcc_options}) + set(${_clang_options}) set(${_nvcc_options}) set(_hipcc_found_options FALSE) set(_hcc_found_options FALSE) + set(_clang_found_options FALSE) set(_nvcc_found_options FALSE) foreach(arg ${ARGN}) if("x${arg}" STREQUAL "xHIPCC_OPTIONS") set(_hipcc_found_options TRUE) set(_hcc_found_options FALSE) + set(_clang_found_options FALSE) set(_nvcc_found_options FALSE) elseif("x${arg}" STREQUAL "xHCC_OPTIONS") set(_hipcc_found_options FALSE) set(_hcc_found_options TRUE) + set(_clang_found_options FALSE) + set(_nvcc_found_options FALSE) + elseif("x${arg}" STREQUAL "xCLANG_OPTIONS") + set(_hipcc_found_options FALSE) + set(_hcc_found_options FALSE) + set(_clang_found_options TRUE) set(_nvcc_found_options FALSE) elseif("x${arg}" STREQUAL "xNVCC_OPTIONS") set(_hipcc_found_options FALSE) set(_hcc_found_options FALSE) + set(_clang_found_options FALSE) set(_nvcc_found_options TRUE) elseif( "x${arg}" STREQUAL "xEXCLUDE_FROM_ALL" OR @@ -259,6 +304,8 @@ macro(HIP_GET_SOURCES_AND_OPTIONS _sources _cmake_options _hipcc_options _hcc_op list(APPEND ${_hipcc_options} ${arg}) elseif(_hcc_found_options) list(APPEND ${_hcc_options} ${arg}) + elseif(_clang_found_options) + list(APPEND ${_clang_options} ${arg}) elseif(_nvcc_found_options) list(APPEND ${_nvcc_options} ${arg}) else() @@ -392,9 +439,10 @@ macro(HIP_PREPARE_TARGET_COMMANDS _target _format _generated_files _source_files endforeach() endif() - HIP_GET_SOURCES_AND_OPTIONS(_hip_sources _hip_cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN}) + HIP_GET_SOURCES_AND_OPTIONS(_hip_sources _hip_cmake_options _hipcc_options _hcc_options _clang_options _nvcc_options ${ARGN}) HIP_PARSE_HIPCC_OPTIONS(HIP_HIPCC_FLAGS ${_hipcc_options}) HIP_PARSE_HIPCC_OPTIONS(HIP_HCC_FLAGS ${_hcc_options}) + HIP_PARSE_HIPCC_OPTIONS(HIP_CLANG_FLAGS ${_clang_options}) HIP_PARSE_HIPCC_OPTIONS(HIP_NVCC_FLAGS ${_nvcc_options}) # Add the compile definitions @@ -416,6 +464,7 @@ macro(HIP_PREPARE_TARGET_COMMANDS _target _format _generated_files _source_files # If we are building a shared library, add extra flags to HIP_HIPCC_FLAGS if(_hip_build_shared_libs) list(APPEND HIP_HCC_FLAGS "-fPIC") + list(APPEND HIP_CLANG_FLAGS "-fPIC") list(APPEND HIP_NVCC_FLAGS "--shared -Xcompiler '-fPIC'") endif() @@ -426,12 +475,14 @@ macro(HIP_PREPARE_TARGET_COMMANDS _target _format _generated_files _source_files set(_HIP_HOST_FLAGS "set(CMAKE_HOST_FLAGS ${CMAKE_${HIP_C_OR_CXX}_FLAGS})") set(_HIP_HIPCC_FLAGS "set(HIP_HIPCC_FLAGS ${HIP_HIPCC_FLAGS})") set(_HIP_HCC_FLAGS "set(HIP_HCC_FLAGS ${HIP_HCC_FLAGS})") + set(_HIP_CLANG_FLAGS "set(HIP_CLANG_FLAGS ${HIP_CLANG_FLAGS})") set(_HIP_NVCC_FLAGS "set(HIP_NVCC_FLAGS ${HIP_NVCC_FLAGS})") foreach(config ${_hip_configuration_types}) string(TOUPPER ${config} config_upper) set(_HIP_HOST_FLAGS "${_HIP_HOST_FLAGS}\nset(CMAKE_HOST_FLAGS_${config_upper} ${CMAKE_${HIP_C_OR_CXX}_FLAGS_${config_upper}})") set(_HIP_HIPCC_FLAGS "${_HIP_HIPCC_FLAGS}\nset(HIP_HIPCC_FLAGS_${config_upper} ${HIP_HIPCC_FLAGS_${config_upper}})") set(_HIP_HCC_FLAGS "${_HIP_HCC_FLAGS}\nset(HIP_HCC_FLAGS_${config_upper} ${HIP_HCC_FLAGS_${config_upper}})") + set(_HIP_CLANG_FLAGS "${_HIP_CLANG_FLAGS}\nset(HIP_CLANG_FLAGS_${config_upper} ${HIP_CLANG_FLAGS_${config_upper}})") set(_HIP_NVCC_FLAGS "${_HIP_NVCC_FLAGS}\nset(HIP_NVCC_FLAGS_${config_upper} ${HIP_NVCC_FLAGS_${config_upper}})") endforeach() @@ -537,21 +588,34 @@ endmacro() ############################################################################### macro(HIP_ADD_EXECUTABLE hip_target) # Separate the sources from the options - HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN}) - HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options}) + HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _hcc_options _clang_options _nvcc_options ${ARGN}) + HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} CLANG_OPTIONS ${_clang_options} NVCC_OPTIONS ${_nvcc_options}) if(_source_files) list(REMOVE_ITEM _sources ${_source_files}) endif() - if("x${HCC_HOME}" STREQUAL "x") - if (DEFINED $ENV{ROCM_PATH}) - set(HCC_HOME "$ENV{ROCM_PATH}/hcc") - elseif( DEFINED $ENV{HIP_PATH}) - set(HCC_HOME "$ENV{HIP_PATH}/../hcc") - else() - set(HCC_HOME "/opt/rocm/hcc") + if("${HIP_COMPILER}" STREQUAL "hcc") + if("x${HCC_HOME}" STREQUAL "x") + if (DEFINED $ENV{ROCM_PATH}) + set(HCC_HOME "$ENV{ROCM_PATH}/hcc") + elseif( DEFINED $ENV{HIP_PATH}) + set(HCC_HOME "$ENV{HIP_PATH}/../hcc") + else() + set(HCC_HOME "/opt/rocm/hcc") + endif() endif() + set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} -o ") + elseif("${HIP_COMPILER}" STREQUAL "clang") + if("x${HIP_CLANG_PATH}" STREQUAL "x") + if (DEFINED $ENV{ROCM_PATH}) + set(HIP_CLANG_PATH "$ENV{ROCM_PATH}/llvm/bin") + elseif( DEFINED $ENV{HIP_PATH}) + set(HIP_CLANG_PATH "$ENV{HIP_PATH}/../llvm/bin") + else() + set(HIP_CLANG_PATH "/opt/rocm/llvm/bin") + endif() + endif() + set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HIP_CLANG_PATH} -o ") endif() - set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} -o ") add_executable(${hip_target} ${_cmake_options} ${_generated_files} ${_sources}) set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE HIP) endmacro() @@ -561,8 +625,8 @@ endmacro() ############################################################################### macro(HIP_ADD_LIBRARY hip_target) # Separate the sources from the options - HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN}) - HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files _source_files ${_sources} ${_cmake_options} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options}) + HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _hcc_options _clang_options _nvcc_options ${ARGN}) + HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files _source_files ${_sources} ${_cmake_options} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} CLANG_OPTIONS ${_clang_options} NVCC_OPTIONS ${_nvcc_options}) if(_source_files) list(REMOVE_ITEM _sources ${_source_files}) endif() diff --git a/cmake/FindHIP/run_hipcc.cmake b/cmake/FindHIP/run_hipcc.cmake index 4dc2572e98..01add26bcf 100644 --- a/cmake/FindHIP/run_hipcc.cmake +++ b/cmake/FindHIP/run_hipcc.cmake @@ -27,10 +27,12 @@ set(HIP_HOST_COMPILER "@HIP_HOST_COMPILER@") # path set(CMAKE_COMMAND "@CMAKE_COMMAND@") # path set(HIP_run_make2cmake "@HIP_run_make2cmake@") # path set(HCC_HOME "@HCC_HOME@") #path +set(HIP_CLANG_PATH "@HIP_CLANG_PATH@") #path @HIP_HOST_FLAGS@ @_HIP_HIPCC_FLAGS@ @_HIP_HCC_FLAGS@ +@_HIP_CLANG_FLAGS@ @_HIP_NVCC_FLAGS@ set(HIP_HIPCC_INCLUDE_ARGS "@HIP_HIPCC_INCLUDE_ARGS@") # list (needs to be in quotes to handle spaces properly) @@ -40,13 +42,22 @@ set(host_flag "@host_flag@") # bool # Determine compiler and compiler flags execute_process(COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --platform OUTPUT_VARIABLE HIP_PLATFORM OUTPUT_STRIP_TRAILING_WHITESPACE) +execute_process(COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --compiler OUTPUT_VARIABLE HIP_COMPILER OUTPUT_STRIP_TRAILING_WHITESPACE) +execute_process(COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --runtime OUTPUT_VARIABLE HIP_RUNTIME OUTPUT_STRIP_TRAILING_WHITESPACE) if(NOT host_flag) set(__CC ${HIP_HIPCC_EXECUTABLE}) - if(HIP_PLATFORM STREQUAL "hcc") - if(NOT "x${HCC_HOME}" STREQUAL "x") - set(ENV{HCC_HOME} ${HCC_HOME}) + if("${HIP_PLATFORM}" STREQUAL "hcc") + if("${HIP_COMPILER}" STREQUAL "hcc") + if(NOT "x${HCC_HOME}" STREQUAL "x") + set(ENV{HCC_HOME} ${HCC_HOME}) + endif() + set(__CC_FLAGS ${HIP_HIPCC_FLAGS} ${HIP_HCC_FLAGS} ${HIP_HIPCC_FLAGS_${build_configuration}} ${HIP_HCC_FLAGS_${build_configuration}}) + elseif("${HIP_COMPILER}" STREQUAL "clang") + if(NOT "x${HIP_CLANG_PATH}" STREQUAL "x") + set(ENV{HIP_CLANG_PATH} ${HIP_CLANG_PATH}) + endif() + set(__CC_FLAGS ${HIP_HIPCC_FLAGS} ${HIP_CLANG_FLAGS} ${HIP_HIPCC_FLAGS_${build_configuration}} ${HIP_CLANG_FLAGS_${build_configuration}}) endif() - set(__CC_FLAGS ${HIP_HIPCC_FLAGS} ${HIP_HCC_FLAGS} ${HIP_HIPCC_FLAGS_${build_configuration}} ${HIP_HCC_FLAGS_${build_configuration}}) else() set(__CC_FLAGS ${HIP_HIPCC_FLAGS} ${HIP_NVCC_FLAGS} ${HIP_HIPCC_FLAGS_${build_configuration}} ${HIP_NVCC_FLAGS_${build_configuration}}) endif() diff --git a/samples/2_Cookbook/12_cmake_hip_add_executable/CMakeLists.txt b/samples/2_Cookbook/12_cmake_hip_add_executable/CMakeLists.txt index 99409724d3..c2a6d60cf3 100644 --- a/samples/2_Cookbook/12_cmake_hip_add_executable/CMakeLists.txt +++ b/samples/2_Cookbook/12_cmake_hip_add_executable/CMakeLists.txt @@ -22,7 +22,8 @@ set(MY_SOURCE_FILES MatrixTranspose.cpp) set(MY_TARGET_NAME MatrixTranspose) set(MY_HIPCC_OPTIONS) set(MY_HCC_OPTIONS) +set(MY_CLANG_OPTIONS) set(MY_NVCC_OPTIONS) set_source_files_properties(${MY_SOURCE_FILES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) -hip_add_executable(${MY_TARGET_NAME} ${MY_SOURCE_FILES} HIPCC_OPTIONS ${MY_HIPCC_OPTIONS} HCC_OPTIONS ${MY_HCC_OPTIONS} NVCC_OPTIONS ${MY_NVCC_OPTIONS}) +hip_add_executable(${MY_TARGET_NAME} ${MY_SOURCE_FILES} HIPCC_OPTIONS ${MY_HIPCC_OPTIONS} HCC_OPTIONS ${MY_HCC_OPTIONS} CLANG_OPTIONS ${MY_CLANG_OPTIONS} NVCC_OPTIONS ${MY_NVCC_OPTIONS}) diff --git a/samples/2_Cookbook/12_cmake_hip_add_executable/Readme.md b/samples/2_Cookbook/12_cmake_hip_add_executable/Readme.md index 937da30af0..4e322fd83e 100644 --- a/samples/2_Cookbook/12_cmake_hip_add_executable/Readme.md +++ b/samples/2_Cookbook/12_cmake_hip_add_executable/Readme.md @@ -28,8 +28,8 @@ If your project already modifies ```CMAKE_MODULE_PATH```, you will need to appen ## Using the hip_add_executable macro FindHIP provides the ```hip_add_executable``` macro that is similar to the ```cuda_add_executable``` macro that is provided by FindCUDA. The syntax is also similar. The ```hip_add_executable``` macro uses the hipcc wrapper as the compiler. -The macro supports specifying HCC-specific, NVCC-specific compiler options using the ```HCC_OPTIONS``` and ```NVCC_OPTIONS``` keywords. -Common options targeting both compilers can be specificed after the ```HIPCC_OPTIONS``` keyword. +The macro supports specifying HCC-specific, CLANG-specific, NVCC-specific compiler options using the ```HCC_OPTIONS```, ```CLANG_OPTIONS``` and ```NVCC_OPTIONS``` keywords. +Common options targeting both compilers can be specificed after the ```HIPCC_OPTIONS``` keyword. ## How to build and run: Use the following commands to build and execute the sample diff --git a/tests/README.md b/tests/README.md index 62e18c0787..a9401ed918 100644 --- a/tests/README.md +++ b/tests/README.md @@ -47,12 +47,13 @@ In the above, BUILD commands provide instructions on how to build the test case The supported syntax for the BUILD command is: ``` -BUILD: %t %s HIPCC_OPTIONS HCC_OPTIONS NVCC_OPTIONS EXCLUDE_HIP_PLATFORM EXCLUDE_HIP_RUNTIME EXCLUDE_HIP_COMPILER DEPENDS +BUILD: %t %s HIPCC_OPTIONS HCC_OPTIONS CLANG_OPTIONS NVCC_OPTIONS EXCLUDE_HIP_PLATFORM EXCLUDE_HIP_RUNTIME EXCLUDE_HIP_COMPILER DEPENDS ``` %s: refers to current source file name. Additional source files needed for the test can be specified by name (including relative path). %t: refers to target executable named derived by removing the extension from the current source file. Alternatively a target executable name can be specified. HIPCC_OPTIONS: All options specified after this delimiter are passed to hipcc on both HCC and NVCC platforms. -HCC_OPTIONS: All options specified after this delimiter are passed to hipcc on HCC platform only. +HCC_OPTIONS: All options specified after this delimiter are passed to hipcc on HCC compiler only. +CLANG_OPTIONS: All options specified after this delimiter are passed to hipcc on HIP-Clang compiler only. NVCC_OPTIONS: All options specified after this delimiter are passed to hipcc on NVCC platform only. EXCLUDE_HIP_PLATFORM: This can be used to exclude a test case from HCC, NVCC or both platforms. EXCLUDE_HIP_RUNTIME: This can be used to exclude a test case from HCC or VDI runtime. diff --git a/tests/hit/HIT.cmake b/tests/hit/HIT.cmake index 1c7f7ff464..be89721ce3 100644 --- a/tests/hit/HIT.cmake +++ b/tests/hit/HIT.cmake @@ -3,11 +3,12 @@ find_package(HIP REQUIRED) #------------------------------------------------------------------------------- # Helper macro to parse BUILD instructions -macro(PARSE_BUILD_COMMAND _target _sources _hipcc_options _hcc_options _nvcc_options _link_options _exclude_platforms _exclude_runtime _exclude_compiler _depends _dir) +macro(PARSE_BUILD_COMMAND _target _sources _hipcc_options _hcc_options _clang_options _nvcc_options _link_options _exclude_platforms _exclude_runtime _exclude_compiler _depends _dir) set(${_target}) set(${_sources}) set(${_hipcc_options}) set(${_hcc_options}) + set(${_clang_options}) set(${_nvcc_options}) set(${_link_options}) set(${_exclude_platforms}) @@ -17,6 +18,7 @@ macro(PARSE_BUILD_COMMAND _target _sources _hipcc_options _hcc_options _nvcc_opt set(_target_found FALSE) set(_hipcc_options_found FALSE) set(_hcc_options_found FALSE) + set(_clang_options_found FALSE) set(_nvcc_options_found FALSE) set(_link_options_found FALSE) set(_exclude_platforms_found FALSE) @@ -30,6 +32,7 @@ macro(PARSE_BUILD_COMMAND _target _sources _hipcc_options _hcc_options _nvcc_opt elseif("x${arg}" STREQUAL "xHIPCC_OPTIONS") set(_hipcc_options_found TRUE) set(_hcc_options_found FALSE) + set(_clang_options_found FALSE) set(_nvcc_options_found FALSE) set(_link_options_found FALSE) set(_exclude_platforms_found FALSE) @@ -39,6 +42,17 @@ macro(PARSE_BUILD_COMMAND _target _sources _hipcc_options _hcc_options _nvcc_opt elseif("x${arg}" STREQUAL "xHCC_OPTIONS") set(_hipcc_options_found FALSE) set(_hcc_options_found TRUE) + set(_clang_options_found FALSE) + set(_nvcc_options_found FALSE) + set(_link_options_found FALSE) + set(_exclude_platforms_found FALSE) + set(_exclude_runtime_found FALSE) + set(_exclude_compiler_found FALSE) + set(_depends_found FALSE) + elseif("x${arg}" STREQUAL "xCLANG_OPTIONS") + set(_hipcc_options_found FALSE) + set(_hcc_options_found FALSE) + set(_clang_options_found TRUE) set(_nvcc_options_found FALSE) set(_link_options_found FALSE) set(_exclude_platforms_found FALSE) @@ -48,6 +62,7 @@ macro(PARSE_BUILD_COMMAND _target _sources _hipcc_options _hcc_options _nvcc_opt elseif("x${arg}" STREQUAL "xNVCC_OPTIONS") set(_hipcc_options_found FALSE) set(_hcc_options_found FALSE) + set(_clang_options_found FALSE) set(_nvcc_options_found TRUE) set(_link_options_found FALSE) set(_exclude_platforms_found FALSE) @@ -57,6 +72,7 @@ macro(PARSE_BUILD_COMMAND _target _sources _hipcc_options _hcc_options _nvcc_opt elseif("x${arg}" STREQUAL "xLINK_OPTIONS") set(_hipcc_options_found FALSE) set(_hcc_options_found FALSE) + set(_clang_options_found FALSE) set(_nvcc_options_found FALSE) set(_link_options_found TRUE) set(_exclude_platforms_found FALSE) @@ -66,6 +82,7 @@ macro(PARSE_BUILD_COMMAND _target _sources _hipcc_options _hcc_options _nvcc_opt elseif("x${arg}" STREQUAL "xEXCLUDE_HIP_PLATFORM") set(_hipcc_options_found FALSE) set(_hcc_options_found FALSE) + set(_clang_options_found FALSE) set(_nvcc_options_found FALSE) set(_link_options_found FALSE) set(_exclude_platforms_found TRUE) @@ -75,6 +92,7 @@ macro(PARSE_BUILD_COMMAND _target _sources _hipcc_options _hcc_options _nvcc_opt elseif("x${arg}" STREQUAL "xEXCLUDE_HIP_RUNTIME") set(_hipcc_options_found FALSE) set(_hcc_options_found FALSE) + set(_clang_options_found FALSE) set(_nvcc_options_found FALSE) set(_link_options_found FALSE) set(_exclude_platforms_found FALSE) @@ -84,6 +102,7 @@ macro(PARSE_BUILD_COMMAND _target _sources _hipcc_options _hcc_options _nvcc_opt elseif("x${arg}" STREQUAL "xEXCLUDE_HIP_COMPILER") set(_hipcc_options_found FALSE) set(_hcc_options_found FALSE) + set(_clang_options_found FALSE) set(_nvcc_options_found FALSE) set(_link_options_found FALSE) set(_exclude_platforms_found FALSE) @@ -93,6 +112,7 @@ macro(PARSE_BUILD_COMMAND _target _sources _hipcc_options _hcc_options _nvcc_opt elseif("x${arg}" STREQUAL "xDEPENDS") set(_hipcc_options_found FALSE) set(_hcc_options_found FALSE) + set(_clang_options_found FALSE) set(_nvcc_options_found FALSE) set(_link_options_found FALSE) set(_exclude_platforms_found FALSE) @@ -104,6 +124,8 @@ macro(PARSE_BUILD_COMMAND _target _sources _hipcc_options _hcc_options _nvcc_opt list(APPEND ${_hipcc_options} ${arg}) elseif(_hcc_options_found) list(APPEND ${_hcc_options} ${arg}) + elseif(_clang_options_found) + list(APPEND ${_clang_options} ${arg}) elseif(_nvcc_options_found) list(APPEND ${_nvcc_options} ${arg}) elseif(_link_options_found) @@ -295,7 +317,7 @@ macro(HIT_ADD_FILES _dir _label _parent) string(REGEX REPLACE "\n" ";" _contents "${_contents}") foreach(_cmd ${_contents}) string(REGEX REPLACE " " ";" _cmd "${_cmd}") - parse_build_command(_target _sources _hipcc_options _hcc_options _nvcc_options _link_options _exclude_platforms _exclude_runtime _exclude_compiler _depends ${_dir} ${_cmd}) + parse_build_command(_target _sources _hipcc_options _hcc_options _clang_options _nvcc_options _link_options _exclude_platforms _exclude_runtime _exclude_compiler _depends ${_dir} ${_cmd}) string(REGEX REPLACE "/" "." target ${_label}/${_target}) if("all" IN_LIST _exclude_platforms OR ${HIP_PLATFORM} IN_LIST _exclude_platforms) insert_into_map("_exclude" "${target}" TRUE) @@ -308,7 +330,7 @@ macro(HIT_ADD_FILES _dir _label _parent) else() set_source_files_properties(${_sources} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) hip_reset_flags() - hip_add_executable(${target} ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options} EXCLUDE_FROM_ALL) + hip_add_executable(${target} ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} CLANG_OPTIONS ${_clang_options} NVCC_OPTIONS ${_nvcc_options} EXCLUDE_FROM_ALL) target_link_libraries(${target} PRIVATE ${_link_options}) set_target_properties(${target} PROPERTIES OUTPUT_NAME ${_target} RUNTIME_OUTPUT_DIRECTORY ${_label} LINK_DEPENDS "${HIP_LIB_FILES}") add_dependencies(${_parent} ${target}) diff --git a/tests/src/deviceLib/hipMathFunctions.cpp b/tests/src/deviceLib/hipMathFunctions.cpp index 4d313167e8..c99d8a91f2 100644 --- a/tests/src/deviceLib/hipMathFunctions.cpp +++ b/tests/src/deviceLib/hipMathFunctions.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp HCC_OPTIONS -Xclang -fallow-half-arguments-and-returns EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../test_common.cpp HCC_OPTIONS -Xclang -fallow-half-arguments-and-returns CLANG_OPTIONS -Xclang -fallow-half-arguments-and-returns EXCLUDE_HIP_PLATFORM nvcc * TEST: %t * HIT_END */ From d890d77da4e08bbfabb06c955c0275b7fde73e71 Mon Sep 17 00:00:00 2001 From: Alex Xie Date: Thu, 30 Apr 2020 22:19:15 -0400 Subject: [PATCH 10/33] SWDEV-221166 - Detect support for large bar access through HIP runtime API Change-Id: Iaa9756c1b5e40c1ab5afb38e44a6699fa5f6c13f --- include/hip/hip_runtime_api.h | 2 +- samples/1_Utils/hipInfo/hipInfo.cpp | 3 ++- src/hip_hcc.cpp | 2 +- src/hip_hcc_internal.h | 3 --- vdi/hip_device.cpp | 1 + 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/include/hip/hip_runtime_api.h b/include/hip/hip_runtime_api.h index b0974aeef6..4dbaaf8d18 100644 --- a/include/hip/hip_runtime_api.h +++ b/include/hip/hip_runtime_api.h @@ -136,7 +136,7 @@ typedef struct hipDeviceProp_t { ///devices with unmatched block dimensions int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on multiple ///devices with unmatched shared memories - + int isLargeBar; ///< 1: if it is a large PCI bar device, else 0 } hipDeviceProp_t; diff --git a/samples/1_Utils/hipInfo/hipInfo.cpp b/samples/1_Utils/hipInfo/hipInfo.cpp index 14faa7671b..31a5430486 100644 --- a/samples/1_Utils/hipInfo/hipInfo.cpp +++ b/samples/1_Utils/hipInfo/hipInfo.cpp @@ -146,7 +146,8 @@ void printDeviceProp(int deviceId) { cout << setw(w1) << "maxTexture3D.width: " << props.maxTexture3D[0] << endl; cout << setw(w1) << "maxTexture3D.height: " << props.maxTexture3D[1] << endl; cout << setw(w1) << "maxTexture3D.depth: " << props.maxTexture3D[2] << endl; - + cout << setw(w1) << "isLargeBar: " << props.isLargeBar << endl; + int deviceCnt; hipGetDeviceCount(&deviceCnt); cout << setw(w1) << "peers: "; diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 5159254d57..ced16739d9 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -864,7 +864,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) { /* Computemode for HSA Devices is always : cudaComputeModeDefault */ prop->computeMode = 0; - _isLargeBar = _acc.has_cpu_accessible_am(); + prop->isLargeBar = _acc.has_cpu_accessible_am() ? 1 : 0; // Get Max Threads Per Multiprocessor uint32_t max_waves_per_cu; diff --git a/src/hip_hcc_internal.h b/src/hip_hcc_internal.h index 803abe28e2..cf3b7f6d45 100644 --- a/src/hip_hcc_internal.h +++ b/src/hip_hcc_internal.h @@ -836,9 +836,6 @@ class ihipDevice_t { unsigned _computeUnits; hipDeviceProp_t _props; // saved device properties. - // TODO - report this through device properties, base on HCC API call. - int _isLargeBar; - // Node id reported by kfd for this device uint32_t _driver_node_id; diff --git a/vdi/hip_device.cpp b/vdi/hip_device.cpp index 5dfc595ee9..65c09398fe 100644 --- a/vdi/hip_device.cpp +++ b/vdi/hip_device.cpp @@ -230,6 +230,7 @@ hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) deviceProps.texturePitchAlignment = info.imagePitchAlignment_; deviceProps.kernelExecTimeoutEnabled = 0; deviceProps.ECCEnabled = info.errorCorrectionSupport_? 1:0; + deviceProps.isLargeBar = info.largeBar_ ? 1 : 0; *props = deviceProps; HIP_RETURN(hipSuccess); From 1ed3af27896364edcb3f6faa3618ca6420da7137 Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Fri, 1 May 2020 17:53:47 -0400 Subject: [PATCH 11/33] SWDEV-231874 - Make hipMemset patten size 8byte aligned when possible Change-Id: Ida98bd89212af9b00f3c9c7c5d22ae81f3b5396a --- vdi/hip_memory.cpp | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/vdi/hip_memory.cpp b/vdi/hip_memory.cpp index d4be73496d..54eacff530 100755 --- a/vdi/hip_memory.cpp +++ b/vdi/hip_memory.cpp @@ -1654,7 +1654,7 @@ hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream) { HIP_RETURN(ihipMemcpyParam3D(pCopy, stream, true)); } -hipError_t packFillMemoryCommand(amd::Memory* memory, size_t offset, int value, size_t valueSize, +hipError_t packFillMemoryCommand(amd::Memory* memory, size_t offset, int64_t value, size_t valueSize, size_t sizeBytes, amd::HostQueue* queue, bool isAsync = false) { if ((memory == nullptr) || (queue == nullptr)) { @@ -1680,7 +1680,7 @@ hipError_t packFillMemoryCommand(amd::Memory* memory, size_t offset, int value, return hipSuccess; } -hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes, +hipError_t ihipMemset(void* dst, int64_t value, size_t valueSize, size_t sizeBytes, hipStream_t stream, bool isAsync = false) { if (sizeBytes == 0) { // Skip if nothing needs filling. @@ -1702,38 +1702,42 @@ hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes, hipError_t hip_error = hipSuccess; amd::HostQueue* queue = hip::getQueue(stream); - int32_t value32 = 0; - const size_t dwordModSize = (sizeBytes % sizeof(int32_t)); + int64_t value64 = 0; + const size_t uint64ModSize = (sizeBytes % sizeof(int64_t)); - if (sizeBytes/sizeof(int32_t) > 0) { + if (sizeBytes/sizeof(int64_t) > 0) { if (valueSize == sizeof(int8_t)) { value = value & 0xff; - value32 = ((value << 24) | (value << 16) | (value << 8) | (value)); + value64 = ((value << 56) | (value << 48) | (value << 40) | (value << 32) + | (value << 24) | (value << 16) | (value << 8) | (value)); } else if (valueSize == sizeof(int16_t)) { value = value & 0xffff; - value32 = ((value<<16) | (value)); + value64 = ((value << 48) | (value << 32) | (value<<16) | (value)); } else if(valueSize == sizeof(int32_t)) { - value32 = value; + value = value & 0xffffffff; + value64 = ((value<<32) | (value)); + } else if (valueSize == sizeof(int64_t)) { + value64 = value; } else { LogPrintfError("Unsupported Pattern size: %u \n", valueSize); return hipErrorInvalidValue; } - // If dwordModSize is != 0 then we will do a second fillBuffer Command + // If uint64ModSize is != 0 then we will do a second fillBuffer Command // on the same stream below, dont wait, do the first call async. - hip_error = packFillMemoryCommand(memory, offset, value32, sizeof(int32_t), - sizeBytes - dwordModSize, queue, - ((dwordModSize != 0) || isAsync)); + hip_error = packFillMemoryCommand(memory, offset, value64, sizeof(int64_t), + sizeBytes - uint64ModSize, queue, + ((uint64ModSize != 0) || isAsync)); if(hip_error != hipSuccess) { return hip_error; } } - if (dwordModSize != 0) { + if (uint64ModSize != 0) { void* new_dst = reinterpret_cast((reinterpret_cast
(dst) - + sizeBytes) - dwordModSize); + + sizeBytes) - uint64ModSize); memory = getMemoryObject(new_dst, offset); hip_error = packFillMemoryCommand(memory, offset, value, valueSize, - dwordModSize, queue, isAsync); + uint64ModSize, queue, isAsync); } return hip_error; From dcd466773b660dd2a9c85b766a9a25cdf2a4147d Mon Sep 17 00:00:00 2001 From: Vlad Sytchenko Date: Thu, 30 Apr 2020 16:38:56 -0400 Subject: [PATCH 12/33] Fix invalid check in hipTexRefGetAddressMode() This resolves https://github.com/ROCm-Developer-Tools/HIP/issues/2048. Change-Id: Iba3653b011434a450a5c3ce10cfd4aa39d318527 --- vdi/hip_texture.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vdi/hip_texture.cpp b/vdi/hip_texture.cpp index acaaf8c165..de62adb402 100755 --- a/vdi/hip_texture.cpp +++ b/vdi/hip_texture.cpp @@ -728,7 +728,7 @@ hipError_t hipTexRefGetAddressMode(hipTextureAddressMode* pam, } // Currently, the only valid value for dim are 0 and 1. - if ((dim != 0) || (dim != 1)) { + if ((dim != 0) && (dim != 1)) { DevLogPrintfError("Currently only 2 dimensions (0,1) are valid," "dim : %d \n", dim); HIP_RETURN(hipErrorInvalidValue); From 6bad7ef7900e4200bc61fcf4606ef6f7a6e3933a Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Sun, 3 May 2020 23:08:12 -0400 Subject: [PATCH 13/33] SWDEV-216213 - Delete kernel function_ in hip::Function destructor Change-Id: I0f6c44927f453ac63b5b84552a5fba07f04a8a8e --- vdi/hip_internal.hpp | 1 + 1 file changed, 1 insertion(+) mode change 100644 => 100755 vdi/hip_internal.hpp diff --git a/vdi/hip_internal.hpp b/vdi/hip_internal.hpp old mode 100644 new mode 100755 index 3d7c5249a8..4f85e7b7d2 --- a/vdi/hip_internal.hpp +++ b/vdi/hip_internal.hpp @@ -177,6 +177,7 @@ namespace hip { amd::Monitor lock_; Function(amd::Kernel* f) : function_(f), lock_("function lock") {} + ~Function() { function_->release(); } hipFunction_t asHipFunction() { return reinterpret_cast(this); } static Function* asFunction(hipFunction_t f) { return reinterpret_cast(f); } From 1012459a6507cecdf5f6943519db2599bd6d96ac Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Sun, 3 May 2020 23:05:59 -0400 Subject: [PATCH 14/33] SWDEV-232464 - Memory Map modules loaded via file from hipModuleLoad Change-Id: Ibef8c28b764b8551529212d99a777ae5fe2dffda --- vdi/hip_module.cpp | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/vdi/hip_module.cpp b/vdi/hip_module.cpp index 5d09f88293..ec8ae1c440 100755 --- a/vdi/hip_module.cpp +++ b/vdi/hip_module.cpp @@ -27,7 +27,7 @@ #include "hip_event.hpp" #include "hip_platform.hpp" -hipError_t ihipModuleLoadData(hipModule_t *module, const void *image); +hipError_t ihipModuleLoadData(hipModule_t* module, const void* mmap_ptr, size_t mmap_size); const std::string& FunctionName(const hipFunction_t f) { @@ -59,19 +59,18 @@ hipError_t hipModuleLoad(hipModule_t* module, const char* fname) { HIP_INIT_API(hipModuleLoad, module, fname); + const void* mmap_ptr = nullptr; + size_t mmap_size = 0; + if (!fname) { HIP_RETURN(hipErrorInvalidValue); } - std::ifstream file(fname, std::ios::binary); - - if (!file.is_open()) { + if (!amd::Os::MemoryMapFile(fname, &mmap_ptr, &mmap_size)) { HIP_RETURN(hipErrorFileNotFound); } - std::vector tmp{std::istreambuf_iterator{file}, std::istreambuf_iterator{}}; - - HIP_RETURN(ihipModuleLoadData(module, tmp.data())); + HIP_RETURN(ihipModuleLoadData(module, mmap_ptr, mmap_size)); } bool ihipModuleUnregisterGlobal(hipModule_t hmod) { @@ -112,7 +111,7 @@ hipError_t hipModuleLoadData(hipModule_t *module, const void *image) { HIP_INIT_API(hipModuleLoadData, module, image); - HIP_RETURN(ihipModuleLoadData(module, image)); + HIP_RETURN(ihipModuleLoadData(module, image, 0)); } hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, @@ -122,7 +121,7 @@ hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, /* TODO: Pass options to Program */ HIP_INIT_API(hipModuleLoadData, module, image); - HIP_RETURN(ihipModuleLoadData(module, image)); + HIP_RETURN(ihipModuleLoadData(module, image, 0)); } extern hipError_t __hipExtractCodeObjectFromFatBinary(const void* data, @@ -216,24 +215,28 @@ inline bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) return true; } -hipError_t ihipModuleLoadData(hipModule_t *module, const void *image) +hipError_t ihipModuleLoadData(hipModule_t* module, const void* mmap_ptr, size_t mmap_size) { + const void* image = nullptr; std::vector> code_objs; - hipError_t code_obj_err = __hipExtractCodeObjectFromFatBinary(image, {hip::getCurrentDevice()->devices()[0]->info().name_}, code_objs); + hipError_t code_obj_err = __hipExtractCodeObjectFromFatBinary(mmap_ptr, + {hip::getCurrentDevice()->devices()[0]->info().name_}, code_objs); if (code_obj_err == hipSuccess) { image = code_objs[0].first; } else if(code_obj_err == hipErrorNoBinaryForGpu) { return code_obj_err; } - amd::Program* program = new amd::Program(*hip::getCurrentDevice()->asContext()); + amd::Program* program = new amd::Program(*hip::getCurrentDevice()->asContext(), + amd::Program::Language::Binary, mmap_ptr, mmap_size); if (program == NULL) { return hipErrorOutOfMemory; } program->setVarInfoCallBack(&getSvarInfo); - if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentDevice()->devices()[0], image, ElfSize(image))) { + if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentDevice()->devices()[0], image, + ElfSize(image), false)) { return hipErrorInvalidKernelFile; } From bfad8d2833d4ba11c71ac3431dafbd7400b726ef Mon Sep 17 00:00:00 2001 From: Vlad Sytchenko Date: Mon, 4 May 2020 15:26:56 -0400 Subject: [PATCH 15/33] Fix even more typos from 5429b40afeac311a48df31adb01b2dc37d3b11fd Change-Id: I4f44261547b321a214348943ff5117eb5bd55b06 --- include/hip/hcc_detail/hip_runtime_api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 0779c64db7..98afa369fa 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -3898,7 +3898,7 @@ static inline hipError_t hipBindTexture( const void *devPtr, size_t size = UINT_MAX) { - return hipBindTexture(offset, &tex, devPtr, tex.channelDesc, size); + return hipBindTexture(offset, &tex, devPtr, &tex.channelDesc, size); } template From 43c3c8946784a6e4831eca06189643bbb6d15085 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Mon, 4 May 2020 20:02:39 +0000 Subject: [PATCH 16/33] Add __HIP_VDI__ to hipconfig host flags for VDI Change-Id: Ia9582f2d51b50cbe151b4866ffbfc9514825f613 --- bin/hipconfig | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/bin/hipconfig b/bin/hipconfig index 2dcc81fa76..b3414213f2 100755 --- a/bin/hipconfig +++ b/bin/hipconfig @@ -109,14 +109,17 @@ if (not defined $HIP_PLATFORM) { } if ($HIP_COMPILER eq "hcc") { - $CPP_CONFIG= " -D__HIP_PLATFORM_HCC__= -I$HIP_PATH/include -I$HCC_HOME/include -I$HSA_PATH/include"; + $CPP_CONFIG = " -D__HIP_PLATFORM_HCC__= -I$HIP_PATH/include -I$HCC_HOME/include -I$HSA_PATH/include"; } if ($HIP_COMPILER eq "clang") { $HIP_CLANG_VERSION = `$HIP_CLANG_PATH/clang++ --version`; $HIP_CLANG_VERSION=~/.*clang version ([^ ]+).*/; $HIP_CLANG_VERSION=$1; - $CPP_CONFIG= " -D__HIP_PLATFORM_HCC__= -I$HIP_PATH/include -I$HIP_CLANG_PATH/../lib/clang/$HIP_CLANG_VERSION -I$HSA_PATH/include"; + $CPP_CONFIG = " -D__HIP_PLATFORM_HCC__= -I$HIP_PATH/include -I$HIP_CLANG_PATH/../lib/clang/$HIP_CLANG_VERSION -I$HSA_PATH/include"; +} +if ($HIP_RUNTIME eq "VDI") { + $CPP_CONFIG .= " -D__HIP_VDI__"; } if ($HIP_PLATFORM eq "nvcc") { $CPP_CONFIG = " -D__HIP_PLATFORM_NVCC__= -I$HIP_PATH/include -I$CUDA_PATH/include"; From fd8ec70847f064362a09cbb34ca920df7d2385cf Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Fri, 1 May 2020 09:22:31 -0400 Subject: [PATCH 17/33] Avoid null stream allocation in hipFree - Add option to skip stream allocation on access. - Avoid null stream allocation in ihipFree, so an inactive device won't be initialized Change-Id: Id24426640df59a5e7a08b2dd9dcd4d67758b84bf --- vdi/hip_device.cpp | 7 ++++--- vdi/hip_internal.hpp | 10 +++++++--- vdi/hip_memory.cpp | 27 +++++++++++++++++++++++---- vdi/hip_stream.cpp | 6 +++--- 4 files changed, 37 insertions(+), 13 deletions(-) diff --git a/vdi/hip_device.cpp b/vdi/hip_device.cpp index 65c09398fe..3476ac14fc 100644 --- a/vdi/hip_device.cpp +++ b/vdi/hip_device.cpp @@ -24,8 +24,9 @@ namespace hip { -amd::HostQueue* Device::NullStream() { - amd::HostQueue* null_queue = null_stream_.asHostQueue(); +// ================================================================================================ +amd::HostQueue* Device::NullStream(bool skip_alloc) { + amd::HostQueue* null_queue = null_stream_.asHostQueue(skip_alloc); if (null_queue == nullptr) { return nullptr; } @@ -34,7 +35,7 @@ amd::HostQueue* Device::NullStream() { return null_queue; } -}; +} hipError_t hipDeviceGet(hipDevice_t *device, int deviceId) { HIP_INIT_API(hipDeviceGet, device, deviceId); diff --git a/vdi/hip_internal.hpp b/vdi/hip_internal.hpp index 4f85e7b7d2..eda87cea7b 100755 --- a/vdi/hip_internal.hpp +++ b/vdi/hip_internal.hpp @@ -90,8 +90,13 @@ namespace hip { public: Stream(Device* dev, amd::CommandQueue::Priority p, unsigned int f = 0, bool null_stream = false); + + /// Creates the hip stream object, including AMD host queue bool Create(); - amd::HostQueue* asHostQueue(); + + /// Get device AMD host queue object. The method can allocate the queue + amd::HostQueue* asHostQueue(bool skip_alloc = false); + void Destroy(); void Finish() const; /// Get device ID associated with the current stream; @@ -147,7 +152,7 @@ namespace hip { return hipErrorPeerAccessNotEnabled; } } - amd::HostQueue* NullStream(); + amd::HostQueue* NullStream(bool skip_alloc = false); }; extern std::once_flag g_ihipInitialized; @@ -182,7 +187,6 @@ namespace hip { static Function* asFunction(hipFunction_t f) { return reinterpret_cast(f); } }; - }; struct ihipExec_t { diff --git a/vdi/hip_memory.cpp b/vdi/hip_memory.cpp index 54eacff530..593513c98d 100755 --- a/vdi/hip_memory.cpp +++ b/vdi/hip_memory.cpp @@ -25,6 +25,7 @@ #include "platform/command.hpp" #include "platform/memory.hpp" +// ================================================================================================ amd::Memory* getMemoryObject(const void* ptr, size_t& offset) { amd::Memory *memObj = amd::MemObjMap::FindMemObj(ptr); if (memObj != nullptr) { @@ -41,21 +42,39 @@ amd::Memory* getMemoryObject(const void* ptr, size_t& offset) { return memObj; } +// ================================================================================================ hipError_t ihipFree(void *ptr) { if (ptr == nullptr) { return hipSuccess; } - if (amd::SvmBuffer::malloced(ptr)) { - for (auto& dev : g_devices) { - dev->NullStream()->finish(); + + size_t offset = 0; + amd::Memory* memory_object = getMemoryObject(ptr, offset); + + if (memory_object != nullptr) { + // Check if it's an allocation in system memory and can be shared across all devices + if (memory_object->getMemFlags() & CL_MEM_SVM_FINE_GRAIN_BUFFER) { + for (auto& dev : g_devices) { + // Skip stream allocation, since if it wasn't allocated until free, then the device + // wasn't used + constexpr bool SkipStreamAlloc = true; + amd::HostQueue* queue = dev->NullStream(SkipStreamAlloc); + if (queue != nullptr) { + queue->finish(); + } + } + } else { + // Wait on the device, associated with the current memory object + hip::getNullStream(memory_object->getContext())->finish(); } - amd::SvmBuffer::free(*hip::getCurrentDevice()->asContext(), ptr); + amd::SvmBuffer::free(memory_object->getContext(), ptr); return hipSuccess; } return hipErrorInvalidValue; } +// ================================================================================================ hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { if (sizeBytes == 0) { diff --git a/vdi/hip_stream.cpp b/vdi/hip_stream.cpp index fbcd223ed2..e4bf4fe192 100644 --- a/vdi/hip_stream.cpp +++ b/vdi/hip_stream.cpp @@ -68,13 +68,13 @@ bool Stream::Create() { } // ================================================================================================ -amd::HostQueue* Stream::asHostQueue() { +amd::HostQueue* Stream::asHostQueue(bool skip_alloc) { // Access to the stream object is lock protected, because possible allocation amd::ScopedLock l(Lock()); if (queue_ == nullptr) { // Create the host queue for the first time - if (!Create()) { - return nullptr; + if (!skip_alloc) { + Create(); } } return queue_; From ea7b96194f5bedbf432ad0841a04aee49f9c5e2b Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Mon, 4 May 2020 18:59:49 -0400 Subject: [PATCH 18/33] SWDEV-234029 - Remove module from module_map_ @ hipModuleUnload Change-Id: Ic01fdb88c658c68298bd51cab2da21dc3ff0e1f7 --- vdi/hip_platform.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vdi/hip_platform.cpp b/vdi/hip_platform.cpp index f7cc1754f2..11bd373550 100755 --- a/vdi/hip_platform.cpp +++ b/vdi/hip_platform.cpp @@ -209,6 +209,7 @@ bool PlatformState::unregisterFunc(hipModule_t hmod) { } delete mod_ptr; } + module_map_.erase(mod_it); } return true; } @@ -360,6 +361,7 @@ bool PlatformState::findModFunc(hipFunction_t* hfunc, hipModule_t hmod, const ch auto mod_it = module_map_.find(hmod); if (mod_it != module_map_.cend()) { + assert(mod_it->second != nullptr); auto func_it = mod_it->second->functions_.find(name); if (func_it != mod_it->second->functions_.cend()) { PlatformState::DeviceFunction& devFunc = func_it->second; From 60c34fbd4d959b6e97432060b929dc7b91ebf605 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Sat, 2 May 2020 01:21:18 +0000 Subject: [PATCH 19/33] Make HIP C compliant Change-Id: Ic2fa650675e68200c841ce3db622da836b169f33 --- include/hip/hcc_detail/hip_runtime_api.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 98afa369fa..47b84bd022 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -3351,7 +3351,7 @@ hipError_t hipBindTexture( const textureReference* tex, const void* devPtr, const hipChannelFormatDesc* desc, - size_t size = UINT_MAX); + size_t size __dparm(UINT_MAX)); hipError_t hipBindTexture2D( size_t* offset, @@ -3386,7 +3386,7 @@ hipError_t hipCreateTextureObject( hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc, const hipTextureDesc* pTexDesc, - const hipResourceViewDesc* pResViewDesc); + const struct hipResourceViewDesc* pResViewDesc); hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject); @@ -3399,7 +3399,7 @@ hipError_t hipGetTextureObjectResourceDesc( hipTextureObject_t textureObject); hipError_t hipGetTextureObjectResourceViewDesc( - hipResourceViewDesc* pResViewDesc, + struct hipResourceViewDesc* pResViewDesc, hipTextureObject_t textureObject); hipError_t hipGetTextureObjectTextureDesc( @@ -3411,12 +3411,12 @@ hipError_t hipTexRefGetAddress( const textureReference* texRef); hipError_t hipTexRefGetAddressMode( - hipTextureAddressMode* pam, + enum hipTextureAddressMode* pam, const textureReference* texRef, int dim); hipError_t hipTexRefGetFilterMode( - hipTextureFilterMode* pfm, + enum hipTextureFilterMode* pfm, const textureReference* texRef); hipError_t hipTexRefGetFlags( @@ -3433,7 +3433,7 @@ hipError_t hipTexRefGetMaxAnisotropy( const textureReference* texRef); hipError_t hipTexRefGetMipmapFilterMode( - hipTextureFilterMode* pfm, + enum hipTextureFilterMode* pfm, const textureReference* texRef); hipError_t hipTexRefGetMipmapLevelBias( @@ -3464,7 +3464,7 @@ hipError_t hipTexRefSetAddress2D( hipError_t hipTexRefSetAddressMode( textureReference* texRef, int dim, - hipTextureAddressMode am); + enum hipTextureAddressMode am); hipError_t hipTexRefSetArray( textureReference* tex, @@ -3477,7 +3477,7 @@ hipError_t hipTexRefSetBorderColor( hipError_t hipTexRefSetFilterMode( textureReference* texRef, - hipTextureFilterMode fm); + enum hipTextureFilterMode fm); hipError_t hipTexRefSetFlags( textureReference* texRef, @@ -3494,7 +3494,7 @@ hipError_t hipTexRefSetMaxAnisotropy( hipError_t hipTexRefSetMipmapFilterMode( textureReference* texRef, - hipTextureFilterMode fm); + enum hipTextureFilterMode fm); hipError_t hipTexRefSetMipmapLevelBias( textureReference* texRef, @@ -3507,7 +3507,7 @@ hipError_t hipTexRefSetMipmapLevelClamp( hipError_t hipTexRefSetMipmappedArray( textureReference* texRef, - hipMipmappedArray* mipmappedArray, + struct hipMipmappedArray* mipmappedArray, unsigned int Flags); hipError_t hipMipmappedArrayCreate( From 09bcd2e378b1b3df25cf0692d8013043e4ef0bc3 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 5 May 2020 14:30:25 -0400 Subject: [PATCH 20/33] Fix hipcc for -E -E is for preprocessing only, which should be compile only. This is required by enabling sccache. Change-Id: Ia0a0acb6a04abd03a9cb5b3c13cf7446837f37b4 --- bin/hipcc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/hipcc b/bin/hipcc index f86a5f7fa3..87ac826b3c 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -422,7 +422,7 @@ foreach $arg (@ARGV) $trimarg = $arg; $trimarg =~ s/^\s+|\s+$//g; # Remive whitespace my $swallowArg = 0; - if ($arg eq '-c' or $arg eq '--genco') { + if ($arg eq '-c' or $arg eq '--genco' or $arg eq '-E') { $compileOnly = 1; $needLDFLAGS = 0; } From 8f53157057256ea7b915b90091a68bfcceea7470 Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Tue, 5 May 2020 09:54:36 -0400 Subject: [PATCH 21/33] SWDEV-209747 - Add compiletime variable DEV_LOG_ENABLE Change-Id: Ie8ff8abafa6b5e0f83209da42778688a047562c3 --- CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) mode change 100644 => 100755 CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt old mode 100644 new mode 100755 index d208c19fb5..8048f712e1 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -204,6 +204,10 @@ if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) endif() endif() +if(DEV_LOG_ENABLE MATCHES "yes") + add_definitions(-DDEV_LOG_ENABLE) +endif() + # Set default install path as "/opt/rocm/hip", can override the path from cmake build. set(CPACK_INSTALL_PREFIX ${HIP_DEFAULT_INSTALL_PREFIX} CACHE PATH "Package Installation path for HIP") From 18b19e032429efdf8da3012def8748b802e54013 Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Tue, 5 May 2020 14:57:57 -0400 Subject: [PATCH 22/33] SWDEV-229840 - Fixing compilation error with right var name. Change-Id: I9f5278638271c384874004eb17085d29ca14ec3a --- vdi/hip_rtc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vdi/hip_rtc.cpp b/vdi/hip_rtc.cpp index 8c82337405..804e903893 100755 --- a/vdi/hip_rtc.cpp +++ b/vdi/hip_rtc.cpp @@ -120,7 +120,7 @@ char* demangle(const char* loweredName) { { free(demangledName); DevLogPrintfError("Cannot undecorate loweredName: %s demangledName: %s \n", - loweredName, demangedName); + loweredName, demangledName); return nullptr; } #else From c5f76c3de3da294f64a126d59528ef746a905366 Mon Sep 17 00:00:00 2001 From: Payam Date: Wed, 29 Apr 2020 23:28:46 -0400 Subject: [PATCH 23/33] name change vdi to rocclr Change-Id: I06d198bbb4a499e153b290b73a92afed3553b252 --- CMakeLists.txt | 36 +++++------ bin/hipcc | 42 ++++++------ bin/hipconfig | 2 +- hip-config.cmake.in | 6 +- include/hip/hcc_detail/channel_descriptor.h | 4 +- include/hip/hcc_detail/device_functions.h | 4 +- include/hip/hcc_detail/hip_runtime_api.h | 64 +++++++++---------- include/hip/hcc_detail/hip_runtime_prof.h | 4 +- include/hip/hip_ext.h | 4 +- {vdi => rocclr}/CMakeLists.txt | 44 +++++++------ {vdi => rocclr}/cl_gl.cpp | 0 {vdi => rocclr}/cl_gl_amd.hpp | 0 {vdi => rocclr}/cl_lqdflash_amd.cpp | 0 {vdi => rocclr}/cl_lqdflash_amd.h | 0 {vdi => rocclr}/fixme.cpp | 0 {vdi => rocclr}/hip_activity.cpp | 0 {vdi => rocclr}/hip_context.cpp | 0 {vdi => rocclr}/hip_conversions.hpp | 0 {vdi => rocclr}/hip_device.cpp | 0 {vdi => rocclr}/hip_device_runtime.cpp | 0 {vdi => rocclr}/hip_error.cpp | 0 {vdi => rocclr}/hip_event.cpp | 0 {vdi => rocclr}/hip_event.hpp | 0 {vdi => rocclr}/hip_formatting.hpp | 0 {vdi => rocclr}/hip_hcc.def.in | 0 {vdi => rocclr}/hip_hcc.map.in | 0 {vdi => rocclr}/hip_hcc.rc | 0 {vdi => rocclr}/hip_intercept.cpp | 0 {vdi => rocclr}/hip_internal.hpp | 0 {vdi => rocclr}/hip_memory.cpp | 0 {vdi => rocclr}/hip_module.cpp | 2 +- {vdi => rocclr}/hip_peer.cpp | 0 {vdi => rocclr}/hip_platform.cpp | 0 {vdi => rocclr}/hip_platform.hpp | 0 {vdi => rocclr}/hip_prof_api.h | 0 {vdi => rocclr}/hip_prof_gen.py | 0 {vdi => rocclr}/hip_profile.cpp | 0 {vdi => rocclr}/hip_rtc.cpp | 0 {vdi => rocclr}/hip_stream.cpp | 0 {vdi => rocclr}/hip_surface.cpp | 0 {vdi => rocclr}/hip_texture.cpp | 4 +- {vdi => rocclr}/hiprtc_internal.hpp | 0 {vdi => rocclr}/trace_helper.h | 0 src/hip_clang.cpp | 4 +- tests/README.md | 16 ++--- tests/src/Negative/memory/hipMemory.cpp | 2 +- .../stream/hipStreamCreateWithFlags.cpp | 2 +- .../complex_loading_behavior.cpp | 2 +- tests/src/gcc/LaunchKernel.c | 8 +-- tests/src/gcc/hipMalloc.c | 8 +-- tests/src/hiprtc/hiprtcGetLoweredName.cpp | 2 +- tests/src/hiprtc/saxpy.cpp | 2 +- tests/src/p2p/hipPeerToPeer_simple.cpp | 6 +- .../runtimeApi/event/hipEventElapsedTime.cpp | 2 +- tests/src/runtimeApi/event/hipEventIpc.cpp | 2 +- .../memory/hipMemcpyNegetiveTests.cpp | 2 +- .../runtimeApi/memory/p2p_copy_coherency.cpp | 2 +- .../module/hipExtModuleLaunchKernel.cpp | 2 +- .../module/hipModuleLoadDataMultThreaded.cpp | 2 +- .../module/hipModuleTexture2dDrv.cpp | 2 +- tests/src/runtimeApi/module/tex2d_kernel.cpp | 2 +- ...upancyMaxActiveBlocksPerMultiprocessor.cpp | 2 +- .../stream/hipStreamAddCallbackCatch.cpp | 2 +- tests/src/surface/hipSurfaceObj2D.cpp | 2 +- tests/src/texture/hipBindTex2DPitch.cpp | 2 +- tests/src/texture/hipBindTexRef1DFetch.cpp | 2 +- .../texture/hipNormalizedFloatValueTex.cpp | 2 +- tests/src/texture/hipTex1DFetchCheckModes.cpp | 2 +- tests/src/texture/hipTextureRef2D.cpp | 2 +- tests/src/texture/simpleTexture2DLayered.cpp | 2 +- tests/src/texture/simpleTexture3D.cpp | 2 +- 71 files changed, 155 insertions(+), 149 deletions(-) rename {vdi => rocclr}/CMakeLists.txt (79%) rename {vdi => rocclr}/cl_gl.cpp (100%) rename {vdi => rocclr}/cl_gl_amd.hpp (100%) rename {vdi => rocclr}/cl_lqdflash_amd.cpp (100%) rename {vdi => rocclr}/cl_lqdflash_amd.h (100%) rename {vdi => rocclr}/fixme.cpp (100%) rename {vdi => rocclr}/hip_activity.cpp (100%) rename {vdi => rocclr}/hip_context.cpp (100%) rename {vdi => rocclr}/hip_conversions.hpp (100%) rename {vdi => rocclr}/hip_device.cpp (100%) rename {vdi => rocclr}/hip_device_runtime.cpp (100%) rename {vdi => rocclr}/hip_error.cpp (100%) rename {vdi => rocclr}/hip_event.cpp (100%) rename {vdi => rocclr}/hip_event.hpp (100%) rename {vdi => rocclr}/hip_formatting.hpp (100%) rename {vdi => rocclr}/hip_hcc.def.in (100%) rename {vdi => rocclr}/hip_hcc.map.in (100%) rename {vdi => rocclr}/hip_hcc.rc (100%) rename {vdi => rocclr}/hip_intercept.cpp (100%) rename {vdi => rocclr}/hip_internal.hpp (100%) rename {vdi => rocclr}/hip_memory.cpp (100%) rename {vdi => rocclr}/hip_module.cpp (99%) rename {vdi => rocclr}/hip_peer.cpp (100%) rename {vdi => rocclr}/hip_platform.cpp (100%) rename {vdi => rocclr}/hip_platform.hpp (100%) rename {vdi => rocclr}/hip_prof_api.h (100%) rename {vdi => rocclr}/hip_prof_gen.py (100%) rename {vdi => rocclr}/hip_profile.cpp (100%) rename {vdi => rocclr}/hip_rtc.cpp (100%) rename {vdi => rocclr}/hip_stream.cpp (100%) rename {vdi => rocclr}/hip_surface.cpp (100%) rename {vdi => rocclr}/hip_texture.cpp (99%) rename {vdi => rocclr}/hiprtc_internal.hpp (100%) rename {vdi => rocclr}/trace_helper.h (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8048f712e1..28f9daf015 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,8 +1,8 @@ cmake_minimum_required(VERSION 3.4.3) project(hip) # sample command for hip-rocclr, you'll need to have rocclr installed -# cmake -DHIP_COMPILER=clang -DHIP_PLATFORM=vdi .. -# cmake -DHIP_COMPILER=clang -DHIP_PLATFORM=vdi -DVDI_DIR=/extra/lmoriche/hip-rocclr/rocclr -DOPENCL_DIR=/extra/lmoriche/clients/lmoriche_opencl_dev2/drivers/opencl/api/opencl -DLIBVDI_STATIC_DIR=/extra/lmoriche/hip-rocclr/build/rocclr .. +# cmake -DHIP_COMPILER=clang -DHIP_PLATFORM=rocclr .. +# cmake -DHIP_COMPILER=clang -DHIP_PLATFORM=rocclr -DROCclr_DIR=/extra/lmoriche/hip-rocclr/rocclr -DOPENCL_DIR=/extra/lmoriche/clients/lmoriche_opencl_dev2/drivers/opencl/api/opencl -DLIBROCclr_STATIC_DIR=/extra/lmoriche/hip-rocclr/build/rocclr .. set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") @@ -123,19 +123,19 @@ message(STATUS "HIP Compiler: " ${HIP_COMPILER}) add_to_config(_buildInfo HIP_COMPILER) # Determine HIP_RUNTIME -# Either HCC or VDI; default is HCC +# Either HCC or ROCclr; default is HCC if(NOT DEFINED ENV{HIP_RUNTIME}) if(HIP_PLATFORM STREQUAL "hcc") set(HIP_RUNTIME "HCC" CACHE STRING "HIP Runtime") - elseif (HIP_PLATFORM STREQUAL "vdi") - set(HIP_RUNTIME "VDI" CACHE STRING "HIP Runtime") + elseif (HIP_PLATFORM STREQUAL "rocclr") + set(HIP_RUNTIME "ROCclr" CACHE STRING "HIP Runtime") elseif (HIP_PLATFORM STREQUAL "nvcc") set(HIP_RUNTIME "CUDA" CACHE STRING "HIP Runtime") endif() endif() add_to_config(_buildInfo HIP_RUNTIME) -if(HIP_PLATFORM STREQUAL "vdi") +if(HIP_PLATFORM STREQUAL "rocclr") set(USE_PROF_API "1") endif() @@ -283,7 +283,7 @@ endif() # add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/lpl_ca) #endif () -if(HIP_PLATFORM STREQUAL "vdi") +if(HIP_PLATFORM STREQUAL "rocclr") # Determine HSA_PATH if(NOT DEFINED HSA_PATH) if(NOT DEFINED ENV{HSA_PATH}) @@ -299,14 +299,14 @@ if(HIP_PLATFORM STREQUAL "vdi") endif() include_directories(${PROJECT_SOURCE_DIR}/include) - add_subdirectory(vdi) + add_subdirectory(rocclr) file(WRITE "${PROJECT_BINARY_DIR}/.hipInfo" ${_buildInfo}) -# set(VDI_CXX_FLAGS "-hc -fno-gpu-rdc --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906 --amdgpu-target=gfx908 ") - set(HIP_VDI_BUILD_FLAGS "${HIP_VDI_BUILD_FLAGS} -fPIC ${VDI_CXX_FLAGS} -I${HSA_PATH}/include") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${HIP_VDI_BUILD_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${HIP_VDI_BUILD_FLAGS}") +# set(ROCclr_CXX_FLAGS "-hc -fno-gpu-rdc --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906 --amdgpu-target=gfx908 ") + set(HIP_ROCclr_BUILD_FLAGS "${HIP_ROCclr_BUILD_FLAGS} -fPIC ${ROCclr_CXX_FLAGS} -I${HSA_PATH}/include") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${HIP_ROCclr_BUILD_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${HIP_ROCclr_BUILD_FLAGS}") set(HCC_CXX_FLAGS "-hc -fno-gpu-rdc --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906 --amdgpu-target=gfx908 ") set(HIP_HCC_BUILD_FLAGS "${HIP_HCC_BUILD_FLAGS} -fPIC ${HCC_CXX_FLAGS} -I${HSA_PATH}/include") @@ -408,7 +408,7 @@ if(HIP_PLATFORM STREQUAL "hcc") file(WRITE "${PROJECT_BINARY_DIR}/.hipInfo" ${_buildInfo}) endif() -if(HIP_PLATFORM STREQUAL "hcc" OR HIP_PLATFORM STREQUAL "vdi") +if(HIP_PLATFORM STREQUAL "hcc" OR HIP_PLATFORM STREQUAL "rocclr") add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/lpl_ca) endif() # Generate .hipVersion @@ -443,7 +443,7 @@ if(HIP_PLATFORM STREQUAL "hcc") endif() # Install .hipInfo -if(HIP_PLATFORM STREQUAL "hcc" OR HIP_PLATFORM STREQUAL "vdi") +if(HIP_PLATFORM STREQUAL "hcc" OR HIP_PLATFORM STREQUAL "rocclr") install(FILES ${PROJECT_BINARY_DIR}/.hipInfo DESTINATION lib) endif() @@ -475,8 +475,8 @@ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/hip if(HIP_PLATFORM STREQUAL "hcc") install(TARGETS hip_hcc_static hip_hcc host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) install(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::) -elseif( HIP_PLATFORM STREQUAL "vdi") -# install(TARGETS hip_on_vdi host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) +elseif( HIP_PLATFORM STREQUAL "rocclr") +# install(TARGETS hip_on_rocclr host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) endif() include(CMakePackageConfigHelpers) @@ -539,7 +539,7 @@ if(HIP_PLATFORM STREQUAL "hcc") WORKING_DIRECTORY ${BUILD_DIR} DEPENDS hip_hcc hip_hcc_static hiprtc) else() - set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/vdi) + set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/rocclr) configure_file(packaging/hip-rocclr.txt ${BUILD_DIR}/CMakeLists.txt @ONLY) configure_file(packaging/hip-rocclr.postinst ${BUILD_DIR}/postinst @ONLY) configure_file(packaging/hip-rocclr.prerm ${BUILD_DIR}/prerm @ONLY) @@ -600,7 +600,7 @@ if(HIP_PLATFORM STREQUAL "hcc") COMMAND bash ${PROJECT_BINARY_DIR}/fixnames WORKING_DIRECTORY ${PROJECT_BINARY_DIR} DEPENDS pkg_hip_base pkg_hip_hcc pkg_hip_nvcc pkg_hip_doc pkg_hip_samples) -elseif(HIP_PLATFORM STREQUAL "vdi") +elseif(HIP_PLATFORM STREQUAL "rocclr") add_custom_target(package COMMAND bash ${PROJECT_BINARY_DIR}/fixnames WORKING_DIRECTORY ${PROJECT_BINARY_DIR} diff --git a/bin/hipcc b/bin/hipcc index 87ac826b3c..d5ae94f9c3 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -26,7 +26,7 @@ use Cwd 'abs_path'; # script's abs_path). Used on AMD platforms only. # HSA_PATH : Path to HSA dir (defaults to ../../hsa relative to abs_path # of this script). Used on AMD platforms only. -# HIP_VDI_HOME : Path to HIP/VDI directory. Used on AMD platforms only. +# HIP_ROCclr_HOME : Path to HIP/ROCclr directory. Used on AMD platforms only. # HIP_CLANG_PATH : Path to HIP-Clang (default to ../../llvm/bin relative to this # script's abs_path). Used on AMD platforms only. @@ -82,15 +82,15 @@ if (-e "$HIP_PATH/../.info/version") { } else { $ROCM_PATH=$ENV{'ROCM_PATH'} // "/opt/rocm"; } -$HIP_VDI_HOME=$ENV{'HIP_VDI_HOME'}; +$HIP_ROCclr_HOME=$ENV{'HIP_ROCclr_HOME'}; $HIP_LIB_PATH=$ENV{'HIP_LIB_PATH'}; $HIP_CLANG_PATH=$ENV{'HIP_CLANG_PATH'}; $DEVICE_LIB_PATH=$ENV{'DEVICE_LIB_PATH'}; $HIP_CLANG_HCC_COMPAT_MODE=$ENV{'HIP_CLANG_HCC_COMPAT_MODE'}; # HCC compatibility mode $HIP_COMPILE_CXX_AS_HIP=$ENV{'HIP_COMPILE_CXX_AS_HIP'} // "1"; -if (defined $HIP_VDI_HOME) { - $HIP_INFO_PATH= "$HIP_VDI_HOME/lib/.hipInfo"; +if (defined $HIP_ROCclr_HOME) { + $HIP_INFO_PATH= "$HIP_ROCclr_HOME/lib/.hipInfo"; } else { $HIP_INFO_PATH= "$HIP_PATH/lib/.hipInfo"; # use actual file } @@ -130,28 +130,28 @@ $HIP_COMPILER= `$HIP_PATH/bin/hipconfig --compiler`; #HIP_RUNTIME controls whether to use HCC, VDI, or NVCC as the runtime: $HIP_RUNTIME= `$HIP_PATH/bin/hipconfig --runtime`; -# If using VDI runtime, need to find HIP_VDI_HOME -if (defined $HIP_RUNTIME and $HIP_RUNTIME eq "VDI" and !defined $HIP_VDI_HOME) { +# If using ROCclr runtime, need to find HIP_ROCclr_HOME +if (defined $HIP_RUNTIME and $HIP_RUNTIME eq "ROCclr" and !defined $HIP_ROCclr_HOME) { my $hipcc_dir = dirname($0); if (-e "$hipcc_dir/../lib/bitcode") { - $HIP_VDI_HOME = abs_path($hipcc_dir . "/.."); + $HIP_ROCclr_HOME = abs_path($hipcc_dir . "/.."); } else { - $HIP_VDI_HOME = $HIP_PATH; # use HIP_PATH + $HIP_ROCclr_HOME = $HIP_PATH; # use HIP_PATH } - $HIPCXXFLAGS .= "-D__HIP_VDI__"; - $HIPCFLAGS .= "-D__HIP_VDI__"; + $HIPCXXFLAGS .= "-D__HIP_ROCclr__"; + $HIPCFLAGS .= "-D__HIP_ROCclr__"; } -if (defined $HIP_VDI_HOME) { - if (!defined $HIP_CLANG_PATH and (-e "$HIP_VDI_HOME/bin/clang" or -e "$HIP_VDI_HOME/bin/clang.exe")) { - $HIP_CLANG_PATH = "$HIP_VDI_HOME/bin"; +if (defined $HIP_ROCclr_HOME) { + if (!defined $HIP_CLANG_PATH and (-e "$HIP_ROCclr_HOME/bin/clang" or -e "$HIP_ROCclr_HOME/bin/clang.exe")) { + $HIP_CLANG_PATH = "$HIP_ROCclr_HOME/bin"; } - if (!defined $DEVICE_LIB_PATH and -e "$HIP_VDI_HOME/lib/bitcode") { - $DEVICE_LIB_PATH = "$HIP_VDI_HOME/lib/bitcode"; + if (!defined $DEVICE_LIB_PATH and -e "$HIP_ROCclr_HOME/lib/bitcode") { + $DEVICE_LIB_PATH = "$HIP_ROCclr_HOME/lib/bitcode"; } - $HIP_INCLUDE_PATH = "$HIP_VDI_HOME/include"; + $HIP_INCLUDE_PATH = "$HIP_ROCclr_HOME/include"; if (!defined $HIP_LIB_PATH) { - $HIP_LIB_PATH = "$HIP_VDI_HOME/lib"; + $HIP_LIB_PATH = "$HIP_ROCclr_HOME/lib"; } } @@ -199,8 +199,8 @@ if ($HIP_PLATFORM eq "hcc" and $HIP_COMPILER eq "clang") { $HIP_LIB_PATH = "$HIP_PATH/lib"; } if ($verbose & 0x2) { - if (defined $HIP_VDI_HOME) { - print ("HIP_VDI_HOME=$HIP_VDI_HOME\n"); + if (defined $HIP_ROCclr_HOME) { + print ("HIP_ROCclr_HOME=$HIP_ROCclr_HOME\n"); } print ("HIP_CLANG_PATH=$HIP_CLANG_PATH\n"); print ("HIP_CLANG_INCLUDE_PATH=$HIP_CLANG_INCLUDE_PATH\n"); @@ -229,8 +229,8 @@ if ($HIP_PLATFORM eq "hcc" and $HIP_COMPILER eq "clang") { $HIPCXXFLAGS .= " -isystem $HSA_PATH/include"; $HIPCFLAGS .= " -isystem $HSA_PATH/include"; if ($HIP_RUNTIME ne "HCC" ) { - $HIPCXXFLAGS .= " -D__HIP_VDI__"; - $HIPCFLAGS .= " -D__HIP_VDI__"; + $HIPCXXFLAGS .= " -D__HIP_ROCclr__"; + $HIPCFLAGS .= " -D__HIP_ROCclr__"; } } elsif ($HIP_PLATFORM eq "hcc" and $HIP_COMPILER eq "hcc") { diff --git a/bin/hipconfig b/bin/hipconfig index b3414213f2..03b412421b 100755 --- a/bin/hipconfig +++ b/bin/hipconfig @@ -33,7 +33,7 @@ if ($p_help) { print " --cpp_config, -C : print C++ compiler options\n"; print " --compiler, -c : print compiler (hcc or clang or nvcc)\n"; print " --platform, -P : print platform (hcc or nvcc)\n"; - print " --runtime, -r : print runtime (HCC or VDI)\n"; + print " --runtime, -r : print runtime (HCC or ROCclr)\n"; print " --full, -f : print full config\n"; print " --version, -v : print hip version\n"; print " --check : check configuration\n"; diff --git a/hip-config.cmake.in b/hip-config.cmake.in index 859e2fa0fc..9ff5832cd9 100644 --- a/hip-config.cmake.in +++ b/hip-config.cmake.in @@ -111,14 +111,14 @@ set_target_properties(hip::host PROPERTIES INTERFACE_COMPILE_DEFINITIONS "__HIP_PLATFORM_HCC__=1" ) -if(HIP_RUNTIME MATCHES "VDI") +if(HIP_RUNTIME MATCHES "ROCclr") set_target_properties(hip::amdhip64 PROPERTIES - INTERFACE_COMPILE_DEFINITIONS "__HIP_VDI__=1" + INTERFACE_COMPILE_DEFINITIONS "__HIP_ROCclr__=1" INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" ) set_target_properties(hip::device PROPERTIES - INTERFACE_COMPILE_DEFINITIONS "__HIP_VDI__=1" + INTERFACE_COMPILE_DEFINITIONS "__HIP_ROCclr__=1" INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include" INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include" ) diff --git a/include/hip/hcc_detail/channel_descriptor.h b/include/hip/hcc_detail/channel_descriptor.h index a69558c8e4..417451fb85 100644 --- a/include/hip/hcc_detail/channel_descriptor.h +++ b/include/hip/hcc_detail/channel_descriptor.h @@ -29,12 +29,12 @@ THE SOFTWARE. #ifdef __cplusplus -#if __HIP_VDI__ +#if __HIP_ROCclr__ extern "C" { #endif HIP_PUBLIC_API hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f); -#if __HIP_VDI__ +#if __HIP_ROCclr__ } #endif diff --git a/include/hip/hcc_detail/device_functions.h b/include/hip/hcc_detail/device_functions.h index 76ac8710d5..eaee437cea 100644 --- a/include/hip/hcc_detail/device_functions.h +++ b/include/hip/hcc_detail/device_functions.h @@ -34,7 +34,7 @@ THE SOFTWARE. #include #include -#if __HIP_CLANG_ONLY__ && __HIP_VDI__ && !_WIN32 +#if __HIP_CLANG_ONLY__ && __HIP_ROCclr__ && !_WIN32 extern "C" __device__ int printf(const char *fmt, ...); #else #if HC_FEATURE_PRINTF @@ -46,7 +46,7 @@ static inline __device__ void printf(const char* format, All... all) { template static inline __device__ void printf(const char* format, All... all) {} #endif // HC_FEATURE_PRINTF -#endif // __HIP_CLANG_ONLY__ && __HIP_VDI__ +#endif // __HIP_CLANG_ONLY__ && __HIP_ROCclr__ /* Integer Intrinsics diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 47b84bd022..7363f904ed 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -35,8 +35,8 @@ THE SOFTWARE. #define GENERIC_GRID_LAUNCH 1 #endif -#ifndef __HIP_VDI__ -#define __HIP_VDI__ 0 +#ifndef __HIP_ROCclr__ +#define __HIP_ROCclr__ 0 #endif #include @@ -44,7 +44,7 @@ THE SOFTWARE. #include #include -#if !__HIP_VDI__ && defined(__cplusplus) +#if !__HIP_ROCclr__ && defined(__cplusplus) #include #include #endif @@ -105,7 +105,7 @@ typedef struct hipIpcMemHandle_st { char reserved[HIP_IPC_HANDLE_SIZE]; } hipIpcMemHandle_t; -#if __HIP_VDI__ +#if __HIP_ROCclr__ // TODO: IPC event handle currently unsupported struct ihipIpcEventHandle_t; typedef struct ihipIpcEventHandle_t* hipIpcEventHandle_t; @@ -1483,7 +1483,7 @@ hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, h hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream); -#if __HIP_VDI__ +#if __HIP_ROCclr__ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, const char* name); @@ -1700,7 +1700,7 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, } #endif // End : Not supported in gcc -#endif // __HIP_VDI__ +#endif // __HIP_ROCclr__ /** * @brief Copy data from src to dst asynchronously. * @@ -2844,7 +2844,7 @@ hipError_t hipFuncGetAttributes(struct hipFuncAttributes* attr, const void* func */ hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunction_t hfunc); -#if !__HIP_VDI__ +#if !__HIP_ROCclr__ #if defined(__cplusplus) } // extern "C" #endif @@ -2899,7 +2899,7 @@ extern "C" { */ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, const char* name); -#endif // __HIP_VDI__ +#endif // __HIP_ROCclr__ hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name); @@ -2960,7 +2960,7 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigne void** kernelParams, void** extra); -#if __HIP_VDI__ && !defined(__HCC__) +#if __HIP_ROCclr__ && !defined(__HCC__) /** * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed * to kernelparams or extra, where thread blocks can cooperate and synchronize as they execute @@ -3345,7 +3345,7 @@ hipError_t hipLaunchKernel(const void* function_address, size_t sharedMemBytes __dparm(0), hipStream_t stream __dparm(0)); -#if __HIP_VDI__ +#if __HIP_ROCclr__ hipError_t hipBindTexture( size_t* offset, const textureReference* tex, @@ -3646,12 +3646,12 @@ inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( class TlsData; -#if !__HIP_VDI__ +#if !__HIP_ROCclr__ hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr, const hipChannelFormatDesc* desc, size_t size = UINT_MAX); #endif -#if !__HIP_VDI__ +#if !__HIP_ROCclr__ hipError_t ihipBindTextureImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode, size_t* offset, const void* devPtr, const struct hipChannelFormatDesc* desc, size_t size, textureReference* tex); @@ -3672,7 +3672,7 @@ hipError_t ihipBindTextureImpl(TlsData *tls, int dim, enum hipTextureReadMode re * @param[in] size - Size of the memory area pointed to by devPtr * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown **/ -#if !__HIP_VDI__ +#if !__HIP_ROCclr__ template hipError_t hipBindTexture(size_t* offset, struct texture& tex, const void* devPtr, const struct hipChannelFormatDesc& desc, size_t size = UINT_MAX) { @@ -3694,7 +3694,7 @@ hipError_t hipBindTexture(size_t* offset, struct texture& tex, * @param[in] size - Size of the memory area pointed to by devPtr * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown **/ -#if !__HIP_VDI__ +#if !__HIP_ROCclr__ template hipError_t hipBindTexture(size_t* offset, struct texture& tex, const void* devPtr, size_t size = UINT_MAX) { @@ -3703,19 +3703,19 @@ hipError_t hipBindTexture(size_t* offset, struct texture& tex, #endif // C API -#if !__HIP_VDI__ +#if !__HIP_ROCclr__ hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* devPtr, const hipChannelFormatDesc* desc, size_t width, size_t height, size_t pitch); #endif -#if !__HIP_VDI__ +#if !__HIP_ROCclr__ hipError_t ihipBindTexture2DImpl(int dim, enum hipTextureReadMode readMode, size_t* offset, const void* devPtr, const struct hipChannelFormatDesc* desc, size_t width, size_t height, textureReference* tex, size_t pitch); #endif -#if !__HIP_VDI__ +#if !__HIP_ROCclr__ template hipError_t hipBindTexture2D(size_t* offset, struct texture& tex, const void* devPtr, size_t width, size_t height, size_t pitch) { @@ -3724,7 +3724,7 @@ hipError_t hipBindTexture2D(size_t* offset, struct texture& te } #endif -#if !__HIP_VDI__ +#if !__HIP_ROCclr__ template hipError_t hipBindTexture2D(size_t* offset, struct texture& tex, const void* devPtr, const struct hipChannelFormatDesc& desc, @@ -3734,26 +3734,26 @@ hipError_t hipBindTexture2D(size_t* offset, struct texture& te #endif // C API -#if !__HIP_VDI__ +#if !__HIP_ROCclr__ hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, const hipChannelFormatDesc* desc); #endif -#if !__HIP_VDI__ +#if !__HIP_ROCclr__ hipError_t ihipBindTextureToArrayImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode, hipArray_const_t array, const struct hipChannelFormatDesc& desc, textureReference* tex); #endif -#if !__HIP_VDI__ +#if !__HIP_ROCclr__ template hipError_t hipBindTextureToArray(struct texture& tex, hipArray_const_t array) { return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, tex.channelDesc, &tex); } #endif -#if !__HIP_VDI__ +#if !__HIP_ROCclr__ template hipError_t hipBindTextureToArray(struct texture& tex, hipArray_const_t array, const struct hipChannelFormatDesc& desc) { @@ -3761,7 +3761,7 @@ hipError_t hipBindTextureToArray(struct texture& tex, hipArray } #endif -#if !__HIP_VDI__ +#if !__HIP_ROCclr__ template inline static hipError_t hipBindTextureToArray(struct texture *tex, hipArray_const_t array, @@ -3771,13 +3771,13 @@ inline static hipError_t hipBindTextureToArray(struct texture #endif // C API -#if !__HIP_VDI__ +#if !__HIP_ROCclr__ hipError_t hipBindTextureToMipmappedArray(const textureReference* tex, hipMipmappedArray_const_t mipmappedArray, const hipChannelFormatDesc* desc); #endif -#if !__HIP_VDI__ +#if !__HIP_ROCclr__ template hipError_t hipBindTextureToMipmappedArray(const texture& tex, hipMipmappedArray_const_t mipmappedArray) { @@ -3785,7 +3785,7 @@ hipError_t hipBindTextureToMipmappedArray(const texture& tex, } #endif -#if !__HIP_VDI__ +#if !__HIP_ROCclr__ template hipError_t hipBindTextureToMipmappedArray(const texture& tex, hipMipmappedArray_const_t mipmappedArray, @@ -3794,7 +3794,7 @@ hipError_t hipBindTextureToMipmappedArray(const texture& tex, } #endif -#if __HIP_VDI__ && !defined(__HCC__) +#if __HIP_ROCclr__ && !defined(__HCC__) template inline hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, @@ -3831,22 +3831,22 @@ inline hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchPara * * @return #hipSuccess **/ -#if !__HIP_VDI__ +#if !__HIP_ROCclr__ hipError_t hipUnbindTexture(const textureReference* tex); #endif -#if !__HIP_VDI__ +#if !__HIP_ROCclr__ extern hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject); #endif -#if !__HIP_VDI__ +#if !__HIP_ROCclr__ template hipError_t hipUnbindTexture(struct texture& tex) { return ihipUnbindTextureImpl(tex.textureObject); } #endif -#if !__HIP_VDI__ +#if !__HIP_ROCclr__ hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array); hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* texref); hipError_t hipGetTextureReference(const textureReference** texref, const void* symbol); @@ -3890,7 +3890,7 @@ hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, const hipReso hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject); -#if __HIP_VDI__ +#if __HIP_ROCclr__ template static inline hipError_t hipBindTexture( size_t *offset, diff --git a/include/hip/hcc_detail/hip_runtime_prof.h b/include/hip/hcc_detail/hip_runtime_prof.h index 4d4eccb54d..ffd8b0ab8e 100644 --- a/include/hip/hcc_detail/hip_runtime_prof.h +++ b/include/hip/hcc_detail/hip_runtime_prof.h @@ -23,7 +23,7 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_PROF_H #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_PROF_H -// HIP VDI Op IDs enumeration +// HIP ROCclr Op IDs enumeration enum HipVdiOpId { kHipVdiOpIdDispatch = 0, kHipVdiOpIdCopy = 1, @@ -31,7 +31,7 @@ enum HipVdiOpId { kHipVdiOpIdNumber = 3 }; -// Types of VDI commands +// Types of ROCclr commands enum HipVdiCommandKind { kHipVdiCommandKernel = 0x11F0, kHipVdiMemcpyDeviceToHost = 0x11F3, diff --git a/include/hip/hip_ext.h b/include/hip/hip_ext.h index 9b54f7fa57..90d1e34d2d 100644 --- a/include/hip/hip_ext.h +++ b/include/hip/hip_ext.h @@ -109,7 +109,7 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, hipEvent_t stopEvent = nullptr) __attribute__((deprecated("use hipExtModuleLaunchKernel instead"))); -//#if !__HIP_VDI__ && defined(__cplusplus) +//#if !__HIP_ROCclr__ && defined(__cplusplus) #if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__) //kernel_descriptor and hip_impl::make_kernarg are in "grid_launch_GGL.hpp" @@ -163,7 +163,7 @@ void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks, stream, startEvent, stopEvent, flags, &config[0]); } -#endif // !__HIP_VDI__ && defined(__cplusplus) +#endif // !__HIP_ROCclr__ && defined(__cplusplus) // doxygen end AMD-specific features /** diff --git a/vdi/CMakeLists.txt b/rocclr/CMakeLists.txt similarity index 79% rename from vdi/CMakeLists.txt rename to rocclr/CMakeLists.txt index 5defe45ac1..ef30ddb8aa 100644 --- a/vdi/CMakeLists.txt +++ b/rocclr/CMakeLists.txt @@ -15,7 +15,7 @@ set(CONFIG_PACKAGE_INSTALL_DIR ${LIB_INSTALL_DIR}/cmake/hip) find_package(PythonInterp REQUIRED) -add_definitions(-D__HIP_VDI__ -D__HIP_PLATFORM_HCC__ -DLINUX -D__x86_64__ -D__AMD64__ -DUNIX_OS -DqLittleEndian -DOPENCL_MAJOR=2 -DOPENCL_MINOR=0 -DCL_TARGET_OPENCL_VERSION=220 -DWITH_AQL -DWITH_ONLINE_COMPILER -DATI_OS_LINUX -DATI_ARCH_X86 -DLITTLEENDIAN_CPU -DATI_BITS_64 -DATI_COMP_GCC -DWITH_HSA_DEVICE -DWITH_TARGET_AMDGCN -DOPENCL_EXPORTS -DCL_USE_DEPRECATED_OPENCL_1_0_APIS -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -DCL_USE_DEPRECATED_OPENCL_1_2_APIS -DCL_USE_DEPRECATED_OPENCL_2_0_APIS -DVEGA10_ONLY=false -DWITH_LIGHTNING_COMPILER -DUSE_PROF_API) +add_definitions(-D__HIP_ROCclr__ -D__HIP_PLATFORM_HCC__ -DLINUX -D__x86_64__ -D__AMD64__ -DUNIX_OS -DqLittleEndian -DOPENCL_MAJOR=2 -DOPENCL_MINOR=0 -DCL_TARGET_OPENCL_VERSION=220 -DWITH_AQL -DWITH_ONLINE_COMPILER -DATI_OS_LINUX -DATI_ARCH_X86 -DLITTLEENDIAN_CPU -DATI_BITS_64 -DATI_COMP_GCC -DWITH_HSA_DEVICE -DWITH_TARGET_AMDGCN -DOPENCL_EXPORTS -DCL_USE_DEPRECATED_OPENCL_1_0_APIS -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -DCL_USE_DEPRECATED_OPENCL_1_2_APIS -DCL_USE_DEPRECATED_OPENCL_2_0_APIS -DVEGA10_ONLY=false -DWITH_LIGHTNING_COMPILER -DUSE_PROF_API) if(CMAKE_BUILD_TYPE MATCHES "^Debug$") add_definitions(-DDEBUG) @@ -27,23 +27,23 @@ endif() set(USE_PROF_API "1") -if(NOT DEFINED LIBVDI_STATIC_DIR) - find_path(LIBVDI_STATIC_DIR - NAMES libamdvdi_static.a +if(NOT DEFINED LIBROCclr_STATIC_DIR) + find_path(LIBROCclr_STATIC_DIR + NAMES libamdrocclr_static.a PATHS /opt/rocm/rocclr PATH_SUFFIXES lib ) endif() -if(NOT DEFINED VDI_DIR) - find_path(VDI_DIR +if(NOT DEFINED ROCclr_DIR) + find_path(ROCclr_DIR NAMES top.hpp PATH_SUFFIXES include PATHS /opt/rocm/rocclr ) endif() -message("Found Static rocclr lib:${LIBVDI_STATIC_DIR} and rocclr includes: ${VDI_DIR}") -set(PROF_API_HEADER_PATH ${VDI_DIR}/platform) +message("Found Static rocclr lib:${LIBROCclr_STATIC_DIR} and rocclr includes: ${ROCclr_DIR}") +set(PROF_API_HEADER_PATH ${ROCclr_DIR}/platform) ############################# # Profiling API support ############################# @@ -85,8 +85,8 @@ if(USE_PROF_API EQUAL 1) endif() -if(NOT DEFINED VDI_DIR OR NOT DEFINED LIBOCL_STATIC_DIR OR NOT DEFINED LIBVDI_STATIC_DIR ) - # message(FATAL_ERROR "define VDI_DIR, LIBOCL_STATIC_DIR\n") +if(NOT DEFINED ROCclr_DIR OR NOT DEFINED LIBOCL_STATIC_DIR OR NOT DEFINED LIBROCclr_STATIC_DIR ) + # message(FATAL_ERROR "define ROCclr_DIR, LIBOCL_STATIC_DIR\n") endif() list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules" ) @@ -104,12 +104,12 @@ include_directories(${PROJECT_BINARY_DIR}/include) include_directories(${CMAKE_SOURCE_DIR}/elfio) include_directories(${CMAKE_SOURCE_DIR}/amdocl) include_directories(${CMAKE_SOURCE_DIR}/include/hip/hcc_detail/elfio) -include_directories(${VDI_DIR}) -include_directories(${VDI_DIR}/include) -include_directories(${VDI_DIR}/compiler/lib) -include_directories(${VDI_DIR}/compiler/lib/include) -include_directories(${VDI_DIR}/elf/utils/common) -include_directories(${VDI_DIR}/elf/utils/libelf) +include_directories(${ROCclr_DIR}) +include_directories(${ROCclr_DIR}/include) +include_directories(${ROCclr_DIR}/compiler/lib) +include_directories(${ROCclr_DIR}/compiler/lib/include) +include_directories(${ROCclr_DIR}/elf/utils/common) +include_directories(${ROCclr_DIR}/elf/utils/libelf) add_definitions(-DUSE_COMGR_LIBRARY -DCOMGR_DYN_DLL) find_package(amd_comgr REQUIRED CONFIG PATHS @@ -157,7 +157,7 @@ add_dependencies(hip64 gen-prof-api-str-header) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -include(${LIBVDI_STATIC_DIR}/amdvdi_staticTargets.cmake) +include(${LIBROCclr_STATIC_DIR}/amdrocclr_staticTargets.cmake) add_library(amdhip64 SHARED $ @@ -195,19 +195,25 @@ target_link_libraries(device INTERFACE host) # TODO: we may create host_static and device_static to let app # link amdhip64_static +<<<<<<< HEAD:vdi/CMakeLists.txt # FIXME: Linux convention is to create static library with same base # filename. target_link_libraries(amdhip64 PRIVATE amdvdi_static Threads::Threads dl) target_link_libraries(amdhip64_static PRIVATE Threads::Threads dl) target_link_libraries(amdhip64_static_temp PRIVATE Threads::Threads dl) +======= +target_link_libraries(amdhip64 PRIVATE amdrocclr_static pthread dl) +target_link_libraries(amdhip64_static PRIVATE pthread dl) +target_link_libraries(amdhip64_static_temp PRIVATE pthread dl) +>>>>>>> bf7c645f... name change vdi to rocclr:rocclr/CMakeLists.txt # combine objects of vid and hip into amdhip64_static add_custom_target( amdhip64_static_combiner ALL COMMAND rm -f $ # Must remove old one, otherwise the new one will contain obsolete stuff - COMMAND ${CMAKE_AR} -rcsT $ $ $ - DEPENDS amdhip64_static amdhip64_static_temp amdvdi_static # To make sure this is the last step + COMMAND ${CMAKE_AR} -rcsT $ $ $ + DEPENDS amdhip64_static amdhip64_static_temp amdrocclr_static # To make sure this is the last step COMMENT "Combining static libs into amdhip64_static" ) diff --git a/vdi/cl_gl.cpp b/rocclr/cl_gl.cpp similarity index 100% rename from vdi/cl_gl.cpp rename to rocclr/cl_gl.cpp diff --git a/vdi/cl_gl_amd.hpp b/rocclr/cl_gl_amd.hpp similarity index 100% rename from vdi/cl_gl_amd.hpp rename to rocclr/cl_gl_amd.hpp diff --git a/vdi/cl_lqdflash_amd.cpp b/rocclr/cl_lqdflash_amd.cpp similarity index 100% rename from vdi/cl_lqdflash_amd.cpp rename to rocclr/cl_lqdflash_amd.cpp diff --git a/vdi/cl_lqdflash_amd.h b/rocclr/cl_lqdflash_amd.h similarity index 100% rename from vdi/cl_lqdflash_amd.h rename to rocclr/cl_lqdflash_amd.h diff --git a/vdi/fixme.cpp b/rocclr/fixme.cpp similarity index 100% rename from vdi/fixme.cpp rename to rocclr/fixme.cpp diff --git a/vdi/hip_activity.cpp b/rocclr/hip_activity.cpp similarity index 100% rename from vdi/hip_activity.cpp rename to rocclr/hip_activity.cpp diff --git a/vdi/hip_context.cpp b/rocclr/hip_context.cpp similarity index 100% rename from vdi/hip_context.cpp rename to rocclr/hip_context.cpp diff --git a/vdi/hip_conversions.hpp b/rocclr/hip_conversions.hpp similarity index 100% rename from vdi/hip_conversions.hpp rename to rocclr/hip_conversions.hpp diff --git a/vdi/hip_device.cpp b/rocclr/hip_device.cpp similarity index 100% rename from vdi/hip_device.cpp rename to rocclr/hip_device.cpp diff --git a/vdi/hip_device_runtime.cpp b/rocclr/hip_device_runtime.cpp similarity index 100% rename from vdi/hip_device_runtime.cpp rename to rocclr/hip_device_runtime.cpp diff --git a/vdi/hip_error.cpp b/rocclr/hip_error.cpp similarity index 100% rename from vdi/hip_error.cpp rename to rocclr/hip_error.cpp diff --git a/vdi/hip_event.cpp b/rocclr/hip_event.cpp similarity index 100% rename from vdi/hip_event.cpp rename to rocclr/hip_event.cpp diff --git a/vdi/hip_event.hpp b/rocclr/hip_event.hpp similarity index 100% rename from vdi/hip_event.hpp rename to rocclr/hip_event.hpp diff --git a/vdi/hip_formatting.hpp b/rocclr/hip_formatting.hpp similarity index 100% rename from vdi/hip_formatting.hpp rename to rocclr/hip_formatting.hpp diff --git a/vdi/hip_hcc.def.in b/rocclr/hip_hcc.def.in similarity index 100% rename from vdi/hip_hcc.def.in rename to rocclr/hip_hcc.def.in diff --git a/vdi/hip_hcc.map.in b/rocclr/hip_hcc.map.in similarity index 100% rename from vdi/hip_hcc.map.in rename to rocclr/hip_hcc.map.in diff --git a/vdi/hip_hcc.rc b/rocclr/hip_hcc.rc similarity index 100% rename from vdi/hip_hcc.rc rename to rocclr/hip_hcc.rc diff --git a/vdi/hip_intercept.cpp b/rocclr/hip_intercept.cpp similarity index 100% rename from vdi/hip_intercept.cpp rename to rocclr/hip_intercept.cpp diff --git a/vdi/hip_internal.hpp b/rocclr/hip_internal.hpp similarity index 100% rename from vdi/hip_internal.hpp rename to rocclr/hip_internal.hpp diff --git a/vdi/hip_memory.cpp b/rocclr/hip_memory.cpp similarity index 100% rename from vdi/hip_memory.cpp rename to rocclr/hip_memory.cpp diff --git a/vdi/hip_module.cpp b/rocclr/hip_module.cpp similarity index 99% rename from vdi/hip_module.cpp rename to rocclr/hip_module.cpp index ec8ae1c440..a09ac63c3c 100755 --- a/vdi/hip_module.cpp +++ b/rocclr/hip_module.cpp @@ -615,7 +615,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL // Find the matching device and request the kernel function if (&queue->vdev()->device() == g_devices[dev]->devices()[0]) { func = PlatformState::instance().getFunc(launch.func, dev); - // Save VDI index of the first device in the launch + // Save ROCclr index of the first device in the launch if (i == 0) { firstDevice = queue->vdev()->device().index(); } diff --git a/vdi/hip_peer.cpp b/rocclr/hip_peer.cpp similarity index 100% rename from vdi/hip_peer.cpp rename to rocclr/hip_peer.cpp diff --git a/vdi/hip_platform.cpp b/rocclr/hip_platform.cpp similarity index 100% rename from vdi/hip_platform.cpp rename to rocclr/hip_platform.cpp diff --git a/vdi/hip_platform.hpp b/rocclr/hip_platform.hpp similarity index 100% rename from vdi/hip_platform.hpp rename to rocclr/hip_platform.hpp diff --git a/vdi/hip_prof_api.h b/rocclr/hip_prof_api.h similarity index 100% rename from vdi/hip_prof_api.h rename to rocclr/hip_prof_api.h diff --git a/vdi/hip_prof_gen.py b/rocclr/hip_prof_gen.py similarity index 100% rename from vdi/hip_prof_gen.py rename to rocclr/hip_prof_gen.py diff --git a/vdi/hip_profile.cpp b/rocclr/hip_profile.cpp similarity index 100% rename from vdi/hip_profile.cpp rename to rocclr/hip_profile.cpp diff --git a/vdi/hip_rtc.cpp b/rocclr/hip_rtc.cpp similarity index 100% rename from vdi/hip_rtc.cpp rename to rocclr/hip_rtc.cpp diff --git a/vdi/hip_stream.cpp b/rocclr/hip_stream.cpp similarity index 100% rename from vdi/hip_stream.cpp rename to rocclr/hip_stream.cpp diff --git a/vdi/hip_surface.cpp b/rocclr/hip_surface.cpp similarity index 100% rename from vdi/hip_surface.cpp rename to rocclr/hip_surface.cpp diff --git a/vdi/hip_texture.cpp b/rocclr/hip_texture.cpp similarity index 99% rename from vdi/hip_texture.cpp rename to rocclr/hip_texture.cpp index de62adb402..a9121f183b 100755 --- a/vdi/hip_texture.cpp +++ b/rocclr/hip_texture.cpp @@ -148,7 +148,7 @@ hipError_t ihipCreateTextureObject(hipTextureObject_t* pTexObject, return hipErrorNotSupported; } - // TODO VDI assumes all dimensions have the same addressing mode. + // TODO ROCclr assumes all dimensions have the same addressing mode. cl_addressing_mode addressMode = CL_ADDRESS_NONE; // If hipTextureDesc::normalizedCoords is set to zero, // hipAddressModeWrap and hipAddressModeMirror won't be supported @@ -220,7 +220,7 @@ hipError_t ihipCreateTextureObject(hipTextureObject_t* pTexObject, if ((pResViewDesc != nullptr) || (readMode == hipReadModeNormalizedFloat) || (pTexDesc->sRGB == 1)) { - // TODO VDI currently right now can only change the format of the image. + // TODO ROCclr currently right now can only change the format of the image. const cl_channel_order channelOrder = (pResViewDesc != nullptr) ? hip::getCLChannelOrder(hip::getNumChannels(pResViewDesc->format), pTexDesc->sRGB) : hip::getCLChannelOrder(pResDesc->res.array.array->NumChannels, pTexDesc->sRGB); const cl_channel_type channelType = (pResViewDesc != nullptr) ? hip::getCLChannelType(hip::getArrayFormat(pResViewDesc->format), readMode) : diff --git a/vdi/hiprtc_internal.hpp b/rocclr/hiprtc_internal.hpp similarity index 100% rename from vdi/hiprtc_internal.hpp rename to rocclr/hiprtc_internal.hpp diff --git a/vdi/trace_helper.h b/rocclr/trace_helper.h similarity index 100% rename from vdi/trace_helper.h rename to rocclr/trace_helper.h diff --git a/src/hip_clang.cpp b/src/hip_clang.cpp index 85aa0ad810..7a2e72a20e 100644 --- a/src/hip_clang.cpp +++ b/src/hip_clang.cpp @@ -199,7 +199,7 @@ struct DeviceVar { std::unordered_multimap g_vars; -//The logic follows PlatformState::getGlobalVar in VDI RT +//The logic follows PlatformState::getGlobalVar in ROCclr RT static DeviceVar* findVar(std::string hostVar, int deviceId, hipModule_t hmod) { DeviceVar* dvar = nullptr; if (hmod != nullptr) { @@ -326,7 +326,7 @@ static bool createGlobalVarObj(const hsa_executable_t& hsaExecutable, const hsa_ // global variable in host code. The shadow host variable is used to keep // track of the value of the device side global variable between kernel // executions. -// The basic logic is taken from VDI RT, but there is much difference. +// The basic logic is taken from ROCclr RT, but there is much difference. extern "C" void __hipRegisterVar( std::vector* modules, // The device modules containing code object char* var, // The shadow variable in host code diff --git a/tests/README.md b/tests/README.md index a9401ed918..7e82f66cd6 100644 --- a/tests/README.md +++ b/tests/README.md @@ -46,8 +46,8 @@ In the above, BUILD commands provide instructions on how to build the test case #### BUILD command The supported syntax for the BUILD command is: -``` -BUILD: %t %s HIPCC_OPTIONS HCC_OPTIONS CLANG_OPTIONS NVCC_OPTIONS EXCLUDE_HIP_PLATFORM EXCLUDE_HIP_RUNTIME EXCLUDE_HIP_COMPILER DEPENDS +<<<<<<< HEAD +BUILD: %t %s HIPCC_OPTIONS HCC_OPTIONS CLANG_OPTIONS NVCC_OPTIONS EXCLUDE_HIP_PLATFORM EXCLUDE_HIP_RUNTIME EXCLUDE_HIP_COMPILER DEPENDS ``` %s: refers to current source file name. Additional source files needed for the test can be specified by name (including relative path). %t: refers to target executable named derived by removing the extension from the current source file. Alternatively a target executable name can be specified. @@ -56,7 +56,7 @@ HCC_OPTIONS: All options specified after this delimiter are passed to hipcc on H CLANG_OPTIONS: All options specified after this delimiter are passed to hipcc on HIP-Clang compiler only. NVCC_OPTIONS: All options specified after this delimiter are passed to hipcc on NVCC platform only. EXCLUDE_HIP_PLATFORM: This can be used to exclude a test case from HCC, NVCC or both platforms. -EXCLUDE_HIP_RUNTIME: This can be used to exclude a test case from HCC or VDI runtime. +EXCLUDE_HIP_RUNTIME: This can be used to exclude a test case from HCC or ROCclr runtime. EXCLUDE_HIP_COMPILER: This can be used to exclude a test case from hcc or clang compiler. EXCLUDE_HIP_RUNTIME AND EXCLUDE_HIP_COMPILER: when both options are specified it excludes test case from particular runtime and compiler. DEPENDS: This can be used to specify dependencies that need to be built before building the current target. @@ -66,7 +66,7 @@ DEPENDS: This can be used to specify dependencies that need to be built before b The supported syntax for the BUILD_CMD command is: ``` -BUILD_CMD: EXCLUDE_HIP_PLATFORM EXCLUDE_HIP_RUNTIME EXCLUDE_HIP_COMPILER DEPENDS +BUILD_CMD: EXCLUDE_HIP_PLATFORM EXCLUDE_HIP_RUNTIME EXCLUDE_HIP_COMPILER DEPENDS ``` %s: refers to current source file name. Additional source files needed for the test can be specified by name (including relative path). %t: refers to target executable named derived by removing the extension from the current source file. Alternatively a target executable name can be specified. @@ -77,7 +77,7 @@ BUILD_CMD: EXCLUDE_HIP_PLATFORM EXCL %S: refers to path to current source file. %T: refers to path to current build target. EXCLUDE_HIP_PLATFORM: This can be used to exclude a test case from HCC, NVCC or both platforms. -EXCLUDE_HIP_RUNTIME: This can be used to exclude a test case from HCC or VDI runtime. +EXCLUDE_HIP_RUNTIME: This can be used to exclude a test case from HCC or ROCclr runtime. EXCLUDE_HIP_COMPILER: This can be used to exclude a test case from hcc or clang compiler. EXCLUDE_HIP_RUNTIME AND EXCLUDE_HIP_COMPILER: when both options are specified it excludes test from particular runtime and compiler. DEPENDS: This can be used to specify dependencies that need to be built before building the current target. @@ -87,11 +87,11 @@ DEPENDS: This can be used to specify dependencies that need to be built before b The supported syntax for the TEST command is: ``` -TEST: %t EXCLUDE_HIP_PLATFORM EXCLUDE_HIP_RUNTIME EXCLUDE_HIP_COMPILER +TEST: %t EXCLUDE_HIP_PLATFORM EXCLUDE_HIP_RUNTIME EXCLUDE_HIP_COMPILER ``` %t: refers to target executable named derived by removing the extension from the current source file. Alternatively a target executable name can be specified. EXCLUDE_HIP_PLATFORM: This can be used to exclude a test case from HCC, NVCC or both platforms. -EXCLUDE_HIP_RUNTIME: This can be used to exclude a test case from HCC or VDI runtime. +EXCLUDE_HIP_RUNTIME: This can be used to exclude a test case from HCC or ROCclr runtime. EXCLUDE_HIP_COMPILER: This can be used to exclude a test case from hcc or clang compiler. EXCLUDE_HIP_RUNTIME AND EXCLUDE_HIP_COMPILER: when both options are specified it excludes test from particular runtime and compiler. Note that if the test has been excluded for a specific platform/runtime/compiler in the BUILD command, it is automatically excluded from the TEST command as well for the sameplatform. @@ -100,7 +100,7 @@ Note that if the test has been excluded for a specific platform/runtime/compiler When using the TEST command, HIT will squash and append the arguments specified to the test executable name to generate the CMAKE test name. Sometimes we might want to specify a more descriptive name. The TEST_NAMED command is used for that. The supported syntax for the TEST_NAMED command is: ``` -TEST: %t CMAKE_TEST_NAME EXCLUDE_HIP_PLATFORM EXCLUDE_HIP_RUNTIME EXCLUDE_HIP_COMPILER +TEST: %t CMAKE_TEST_NAME EXCLUDE_HIP_PLATFORM EXCLUDE_HIP_RUNTIME EXCLUDE_HIP_COMPILER ``` diff --git a/tests/src/Negative/memory/hipMemory.cpp b/tests/src/Negative/memory/hipMemory.cpp index a71ee948f5..030df7e69c 100644 --- a/tests/src/Negative/memory/hipMemory.cpp +++ b/tests/src/Negative/memory/hipMemory.cpp @@ -18,7 +18,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM vdi + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM rocclr * TEST: %t * HIT_END */ diff --git a/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp b/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp index 6f0662b82d..9ef13eec5a 100644 --- a/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp +++ b/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp @@ -18,7 +18,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM vdi + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM rocclr * TEST: %t * HIT_END */ diff --git a/tests/src/dynamicLoading/complex_loading_behavior.cpp b/tests/src/dynamicLoading/complex_loading_behavior.cpp index 954fa7da35..8288e15723 100644 --- a/tests/src/dynamicLoading/complex_loading_behavior.cpp +++ b/tests/src/dynamicLoading/complex_loading_behavior.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. /* HIT_START * BUILD_CMD: libfoo_amd %hc %S/%s -o libfoo.so -Xcompiler -fPIC -lpthread -shared -DTEST_SHARED_LIBRARY EXCLUDE_HIP_PLATFORM nvcc - * BUILD_CMD: libfoo_nvidia %hc %S/%s -o libfoo.so -Xcompiler -fPIC -lpthread -shared -DTEST_SHARED_LIBRARY EXCLUDE_HIP_PLATFORM hcc vdi + * BUILD_CMD: libfoo_nvidia %hc %S/%s -o libfoo.so -Xcompiler -fPIC -lpthread -shared -DTEST_SHARED_LIBRARY EXCLUDE_HIP_PLATFORM hcc rocclr * BUILD_CMD: %t %hc %S/%s -o %T/%t -ldl * TEST: %t * HIT_END diff --git a/tests/src/gcc/LaunchKernel.c b/tests/src/gcc/LaunchKernel.c index d2fc854510..1791d52d25 100644 --- a/tests/src/gcc/LaunchKernel.c +++ b/tests/src/gcc/LaunchKernel.c @@ -19,10 +19,10 @@ /* HIT_START - * BUILD_CMD: gpu.o %hc -I%hip-path/include -g -c %S/gpu.cpp -o %T/gpu.o EXCLUDE_HIP_PLATFORM nvcc vdi - * BUILD_CMD: launchkernel.o %hc -D__HIP_PLATFORM_HCC__ -g -I%hip-path/include -c %S/LaunchKernel.c -o %T/launchkernel.o EXCLUDE_HIP_PLATFORM nvcc vdi - * BUILD_CMD: LaunchKernel %hc %T/launchkernel.o %T/gpu.o -g -Wl,--rpath=%hip-path/lib %hip-path/lib/libhip_hcc.so -o %T/%t DEPENDS gpu.o launchkernel.o EXCLUDE_HIP_PLATFORM nvcc vdi - * TEST: %t EXCLUDE_HIP_PLATFORM nvcc vdi + * BUILD_CMD: gpu.o %hc -I%hip-path/include -g -c %S/gpu.cpp -o %T/gpu.o EXCLUDE_HIP_PLATFORM nvcc rocclr + * BUILD_CMD: launchkernel.o %hc -D__HIP_PLATFORM_HCC__ -g -I%hip-path/include -c %S/LaunchKernel.c -o %T/launchkernel.o EXCLUDE_HIP_PLATFORM nvcc rocclr + * BUILD_CMD: LaunchKernel %hc %T/launchkernel.o %T/gpu.o -g -Wl,--rpath=%hip-path/lib %hip-path/lib/libhip_hcc.so -o %T/%t DEPENDS gpu.o launchkernel.o EXCLUDE_HIP_PLATFORM nvcc rocclr + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc rocclr * HIT_END */ diff --git a/tests/src/gcc/hipMalloc.c b/tests/src/gcc/hipMalloc.c index ebf163de28..f54071f907 100644 --- a/tests/src/gcc/hipMalloc.c +++ b/tests/src/gcc/hipMalloc.c @@ -18,10 +18,10 @@ * */ /* HIT_START - * BUILD_CMD: hipMalloc %cc -D__HIP_PLATFORM_NVCC__ -I%hip-path/include -I/usr/local/cuda/include %S/%s -o %T/hipMalloc_nv -L/usr/local/cuda/lib64 -lcudart EXCLUDE_HIP_PLATFORM hcc vdi - * BUILD_CMD: hipMalloc %cc -D__HIP_PLATFORM_HCC__ -I%hip-path/include %S/%s -Wl,--rpath=%hip-path/lib %hip-path/lib/libhip_hcc.so -o %T/hipMalloc_hcc EXCLUDE_HIP_PLATFORM nvcc vdi - * TEST: hipMalloc_nv EXCLUDE_HIP_PLATFORM hcc vdi - * TEST: hipMalloc_hcc EXCLUDE_HIP_PLATFORM nvcc vdi + * BUILD_CMD: hipMalloc %cc -D__HIP_PLATFORM_NVCC__ -I%hip-path/include -I/usr/local/cuda/include %S/%s -o %T/hipMalloc_nv -L/usr/local/cuda/lib64 -lcudart EXCLUDE_HIP_PLATFORM hcc rocclr + * BUILD_CMD: hipMalloc %cc -D__HIP_PLATFORM_HCC__ -I%hip-path/include %S/%s -Wl,--rpath=%hip-path/lib %hip-path/lib/libhip_hcc.so -o %T/hipMalloc_hcc EXCLUDE_HIP_PLATFORM nvcc rocclr + * TEST: hipMalloc_nv EXCLUDE_HIP_PLATFORM hcc rocclr + * TEST: hipMalloc_hcc EXCLUDE_HIP_PLATFORM nvcc rocclr * HIT_END */ diff --git a/tests/src/hiprtc/hiprtcGetLoweredName.cpp b/tests/src/hiprtc/hiprtcGetLoweredName.cpp index a63e13af64..407533fd19 100644 --- a/tests/src/hiprtc/hiprtcGetLoweredName.cpp +++ b/tests/src/hiprtc/hiprtcGetLoweredName.cpp @@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc vdi + * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc rocclr * TEST: %t * HIT_END */ diff --git a/tests/src/hiprtc/saxpy.cpp b/tests/src/hiprtc/saxpy.cpp index a08c1c2399..cb7e7cdb5f 100755 --- a/tests/src/hiprtc/saxpy.cpp +++ b/tests/src/hiprtc/saxpy.cpp @@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc vdi + * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc rocclr * TEST: %t * HIT_END */ diff --git a/tests/src/p2p/hipPeerToPeer_simple.cpp b/tests/src/p2p/hipPeerToPeer_simple.cpp index 90e7112356..9f0982f353 100644 --- a/tests/src/p2p/hipPeerToPeer_simple.cpp +++ b/tests/src/p2p/hipPeerToPeer_simple.cpp @@ -24,9 +24,9 @@ THE SOFTWARE. /* HIT_START * BUILD: %t %s ../test_common.cpp - * TEST: %t EXCLUDE_HIP_PLATFORM hcc vdi - * TEST: %t --memcpyWithPeer EXCLUDE_HIP_PLATFORM hcc vdi - * TEST: %t --mirrorPeers EXCLUDE_HIP_PLATFORM hcc vdi + * TEST: %t EXCLUDE_HIP_PLATFORM hcc rocclr + * TEST: %t --memcpyWithPeer EXCLUDE_HIP_PLATFORM hcc rocclr + * TEST: %t --mirrorPeers EXCLUDE_HIP_PLATFORM hcc rocclr * HIT_END */ diff --git a/tests/src/runtimeApi/event/hipEventElapsedTime.cpp b/tests/src/runtimeApi/event/hipEventElapsedTime.cpp index 61afaa93d3..afb1802194 100644 --- a/tests/src/runtimeApi/event/hipEventElapsedTime.cpp +++ b/tests/src/runtimeApi/event/hipEventElapsedTime.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM vdi + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM rocclr * TEST: %t * HIT_END */ diff --git a/tests/src/runtimeApi/event/hipEventIpc.cpp b/tests/src/runtimeApi/event/hipEventIpc.cpp index dd6c23e334..bec87ee725 100644 --- a/tests/src/runtimeApi/event/hipEventIpc.cpp +++ b/tests/src/runtimeApi/event/hipEventIpc.cpp @@ -24,7 +24,7 @@ THE SOFTWARE. // forces synchronization : set /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc rocclr * TEST: %t --iterations 10 * HIT_END */ diff --git a/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp b/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp index febc664f7d..ceaa8ed0d9 100644 --- a/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp +++ b/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp @@ -18,7 +18,7 @@ * */ /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc rocclr * TEST: %t * HIT_END */ diff --git a/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp b/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp index adface243d..22bd683631 100644 --- a/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp +++ b/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp @@ -34,7 +34,7 @@ THE SOFTWARE. #define USE_HCC_MEMTRACKER 0 /* Debug flag to show the memtracker periodically */ -#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_VDI__) +#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_ROCclr__) #include #else #define USE_HCC_MEMTRACKER 0 diff --git a/tests/src/runtimeApi/module/hipExtModuleLaunchKernel.cpp b/tests/src/runtimeApi/module/hipExtModuleLaunchKernel.cpp index a26c9be4a0..b9327a9c58 100755 --- a/tests/src/runtimeApi/module/hipExtModuleLaunchKernel.cpp +++ b/tests/src/runtimeApi/module/hipExtModuleLaunchKernel.cpp @@ -19,7 +19,7 @@ THE SOFTWARE. /* HIT_START * BUILD_CMD: matmul.code %hc --genco %S/matmul.cpp -o matmul.code EXCLUDE_HIP_PLATFORM nvcc - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc rocclr * TEST: %t * HIT_END */ diff --git a/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp b/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp index 11bd6e7d50..eef367ab70 100644 --- a/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp +++ b/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp @@ -18,7 +18,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM vdi + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM rocclr * TEST: %t * HIT_END */ diff --git a/tests/src/runtimeApi/module/hipModuleTexture2dDrv.cpp b/tests/src/runtimeApi/module/hipModuleTexture2dDrv.cpp index e7c254e9fd..9318d168a0 100644 --- a/tests/src/runtimeApi/module/hipModuleTexture2dDrv.cpp +++ b/tests/src/runtimeApi/module/hipModuleTexture2dDrv.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc rocclr * TEST: %t * HIT_END */ diff --git a/tests/src/runtimeApi/module/tex2d_kernel.cpp b/tests/src/runtimeApi/module/tex2d_kernel.cpp index e744d88776..af5010a221 100644 --- a/tests/src/runtimeApi/module/tex2d_kernel.cpp +++ b/tests/src/runtimeApi/module/tex2d_kernel.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD_CMD: tex2d_kernel.code %hc --genco %S/tex2d_kernel.cpp -o tex2d_kernel.code EXCLUDE_HIP_PLATFORM vdi + * BUILD_CMD: tex2d_kernel.code %hc --genco %S/tex2d_kernel.cpp -o tex2d_kernel.code EXCLUDE_HIP_PLATFORM rocclr * HIT_END */ diff --git a/tests/src/runtimeApi/occupancy/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp b/tests/src/runtimeApi/occupancy/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp index d8385669ea..ad37dac8bc 100644 --- a/tests/src/runtimeApi/occupancy/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp +++ b/tests/src/runtimeApi/occupancy/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. // Test the Grid_Launch syntax. /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM vdi + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM rocclr * TEST: %t * HIT_END */ diff --git a/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp b/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp index c22b390ecc..47c2e9fe9c 100644 --- a/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp +++ b/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp @@ -11,7 +11,7 @@ #include "test_common.h" /* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM vdi + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM rocclr * TEST: %t * HIT_END */ diff --git a/tests/src/surface/hipSurfaceObj2D.cpp b/tests/src/surface/hipSurfaceObj2D.cpp index 2724604279..cc154fb65f 100644 --- a/tests/src/surface/hipSurfaceObj2D.cpp +++ b/tests/src/surface/hipSurfaceObj2D.cpp @@ -1,5 +1,5 @@ /* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM rocclr * TEST: %t * HIT_END */ diff --git a/tests/src/texture/hipBindTex2DPitch.cpp b/tests/src/texture/hipBindTex2DPitch.cpp index 6cee22a45d..2fd3f1228d 100644 --- a/tests/src/texture/hipBindTex2DPitch.cpp +++ b/tests/src/texture/hipBindTex2DPitch.cpp @@ -18,7 +18,7 @@ THE SOFTWARE. */ /*HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM rocclr * TEST: %t * HIT_END */ diff --git a/tests/src/texture/hipBindTexRef1DFetch.cpp b/tests/src/texture/hipBindTexRef1DFetch.cpp index af79153fe0..eeaf42129f 100644 --- a/tests/src/texture/hipBindTexRef1DFetch.cpp +++ b/tests/src/texture/hipBindTexRef1DFetch.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. /* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM rocclr * TEST: %t * HIT_END */ diff --git a/tests/src/texture/hipNormalizedFloatValueTex.cpp b/tests/src/texture/hipNormalizedFloatValueTex.cpp index b4aa3e9c05..95b7c1d879 100644 --- a/tests/src/texture/hipNormalizedFloatValueTex.cpp +++ b/tests/src/texture/hipNormalizedFloatValueTex.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc hcc vdi + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc hcc rocclr * TEST: %t * HIT_END */ diff --git a/tests/src/texture/hipTex1DFetchCheckModes.cpp b/tests/src/texture/hipTex1DFetchCheckModes.cpp index 381d07280c..5a8ff959fa 100644 --- a/tests/src/texture/hipTex1DFetchCheckModes.cpp +++ b/tests/src/texture/hipTex1DFetchCheckModes.cpp @@ -18,7 +18,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM rocclr * TEST: %t * HIT_END */ diff --git a/tests/src/texture/hipTextureRef2D.cpp b/tests/src/texture/hipTextureRef2D.cpp index 5247f81fe0..4f00260998 100644 --- a/tests/src/texture/hipTextureRef2D.cpp +++ b/tests/src/texture/hipTextureRef2D.cpp @@ -1,5 +1,5 @@ /* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM rocclr * TEST: %t * HIT_END */ diff --git a/tests/src/texture/simpleTexture2DLayered.cpp b/tests/src/texture/simpleTexture2DLayered.cpp index 8b1bbb64a3..1424976716 100644 --- a/tests/src/texture/simpleTexture2DLayered.cpp +++ b/tests/src/texture/simpleTexture2DLayered.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM rocclr * TEST: %t * HIT_END */ diff --git a/tests/src/texture/simpleTexture3D.cpp b/tests/src/texture/simpleTexture3D.cpp index 82f6cf5e99..bd342df1c3 100644 --- a/tests/src/texture/simpleTexture3D.cpp +++ b/tests/src/texture/simpleTexture3D.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc rocclr * TEST: %t * HIT_END */ From 6f86a740185e3b280b4a7e5e83f3ad7b94371f47 Mon Sep 17 00:00:00 2001 From: Aakash Sudhanwa Date: Wed, 6 May 2020 07:01:58 -0700 Subject: [PATCH 24/33] rocclr/CMakeLists.txt: Fixing unresolved merge conflict Change-Id: I09b9a955b0f1ece8d278112a22bc8aef31b57258 Signed-off-by: Aakash Sudhanwa --- rocclr/CMakeLists.txt | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/rocclr/CMakeLists.txt b/rocclr/CMakeLists.txt index ef30ddb8aa..5158b7935e 100644 --- a/rocclr/CMakeLists.txt +++ b/rocclr/CMakeLists.txt @@ -195,17 +195,11 @@ target_link_libraries(device INTERFACE host) # TODO: we may create host_static and device_static to let app # link amdhip64_static -<<<<<<< HEAD:vdi/CMakeLists.txt # FIXME: Linux convention is to create static library with same base # filename. -target_link_libraries(amdhip64 PRIVATE amdvdi_static Threads::Threads dl) +target_link_libraries(amdhip64 PRIVATE amdrocclr_static Threads::Threads dl) target_link_libraries(amdhip64_static PRIVATE Threads::Threads dl) target_link_libraries(amdhip64_static_temp PRIVATE Threads::Threads dl) -======= -target_link_libraries(amdhip64 PRIVATE amdrocclr_static pthread dl) -target_link_libraries(amdhip64_static PRIVATE pthread dl) -target_link_libraries(amdhip64_static_temp PRIVATE pthread dl) ->>>>>>> bf7c645f... name change vdi to rocclr:rocclr/CMakeLists.txt # combine objects of vid and hip into amdhip64_static add_custom_target( From bb6922b8a333a7a1400243d098bfbcb44b22fcc5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 1 May 2020 08:50:33 -0400 Subject: [PATCH 25/33] Don't use relative include path The header base path is already in the include set. Change-Id: Id83abaa921d720ae103e3281161b875fcf84493d --- lpl_ca/lpl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lpl_ca/lpl.hpp b/lpl_ca/lpl.hpp index 941f30123a..84a6930753 100644 --- a/lpl_ca/lpl.hpp +++ b/lpl_ca/lpl.hpp @@ -4,7 +4,7 @@ #include "clara/clara.hpp" #include "pstreams/pstream.h" -#include "../include/hip/hcc_detail/elfio/elfio.hpp" +#include "hip/hcc_detail/elfio/elfio.hpp" #include From f0b49d05601e06b327b79e242d7a8d84823191c0 Mon Sep 17 00:00:00 2001 From: Christophe Paquot Date: Wed, 6 May 2020 10:36:53 -0700 Subject: [PATCH 26/33] Skip iHipWaitActiveStreams if stream is NonBlocking SWDEV-234484 Change-Id: I279bdc8485d8218e0aaa89d094c08a84f002a608 --- rocclr/hip_context.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rocclr/hip_context.cpp b/rocclr/hip_context.cpp index 2f75d07b8a..2d0abc6add 100755 --- a/rocclr/hip_context.cpp +++ b/rocclr/hip_context.cpp @@ -84,7 +84,9 @@ amd::HostQueue* getQueue(hipStream_t stream) { } else { constexpr bool WaitNullStreamOnly = true; amd::HostQueue* queue = reinterpret_cast(stream)->asHostQueue(); - iHipWaitActiveStreams(queue, WaitNullStreamOnly); + if (!(reinterpret_cast(stream)->Flags() & hipStreamNonBlocking)) { + iHipWaitActiveStreams(queue, WaitNullStreamOnly); + } return queue; } } From 6e972dd3bb94631541805b0795f286a9cf0c1339 Mon Sep 17 00:00:00 2001 From: Tao Sang Date: Mon, 4 May 2020 18:06:34 -0400 Subject: [PATCH 27/33] Support performance tests Support performance tests while direct tests commands keep unchanged. To build performance tests, run "make build_perf". To run all performance testis, run "make perf". To run specific tests, for example, run /usr/bin/ctest -C performance -R performance_tests/perfDispatch --verbose To run individual test, for example, run performance_tests/memory/hipPerfMemMallocCpyFree Change-Id: I168c1b9ef1ec21b392d48648d0c71e8fbd37d57b --- CMakeLists.txt | 16 +++++++- tests/README.md | 22 +++++++++++ tests/hip_tests.txt | 2 +- tests/hit/HIT.cmake | 37 ++++++++++++------- .../memory/hipPerfMemMallocCpyFree.cpp | 2 +- .../hipPerfBufferCopyRectSpeed.cpp | 2 +- .../perfDispatch/hipPerfBufferCopySpeed.cpp | 2 +- .../perfDispatch/hipPerfDispatchSpeed.cpp | 2 +- .../perfDispatch/timer.cpp | 0 .../perfDispatch/timer.h | 0 10 files changed, 65 insertions(+), 20 deletions(-) rename tests/{src/Performance => performance}/memory/hipPerfMemMallocCpyFree.cpp (98%) rename tests/{src/Performance => performance}/perfDispatch/hipPerfBufferCopyRectSpeed.cpp (99%) rename tests/{src/Performance => performance}/perfDispatch/hipPerfBufferCopySpeed.cpp (99%) rename tests/{src/Performance => performance}/perfDispatch/hipPerfDispatchSpeed.cpp (98%) rename tests/{src/Performance => performance}/perfDispatch/timer.cpp (100%) rename tests/{src/Performance => performance}/perfDispatch/timer.h (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 28f9daf015..db72c8dd75 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -648,15 +648,27 @@ if(${RUN_HIT} EQUAL 0) # Add tests include_directories(${HIP_SRC_PATH}/tests/src) - hit_add_directory_recursive(${HIP_SRC_PATH}/tests/src "directed_tests") + hit_add_directory_recursive(${HIP_CTEST_CONFIG_DEFAULT} ${HIP_SRC_PATH}/tests/src "directed_tests") # Add unit tests include_directories(${HIP_SRC_PATH}/tests/unit) - hit_add_directory_recursive(${HIP_SRC_PATH}/tests/unit "unit_tests") + hit_add_directory_recursive(${HIP_CTEST_CONFIG_DEFAULT} ${HIP_SRC_PATH}/tests/unit "unit_tests") + + # Add performance tests + include_directories(${HIP_SRC_PATH}/tests/performance) + hit_add_directory_recursive(${HIP_CTEST_CONFIG_PERFORMANCE} ${HIP_SRC_PATH}/tests/performance "performance_tests") # Add top-level tests to build_tests add_custom_target(build_tests DEPENDS directed_tests unit_tests) + # Add top-level tests to build performance_tests. + # To build performance tests, just run "make build_perf" + add_custom_target(build_perf DEPENDS performance_tests) + + # Add custom target: perf. + # To run performance tests, just run "make perf" + add_custom_target(perf COMMAND "${CMAKE_CTEST_COMMAND}" -C "${HIP_CTEST_CONFIG_PERFORMANCE}" -R "performance_tests/" --verbose) + # Add custom target: check add_custom_target(check COMMAND "${CMAKE_COMMAND}" --build . --target test DEPENDS build_tests) else() diff --git a/tests/README.md b/tests/README.md index 7e82f66cd6..275a25f9dd 100644 --- a/tests/README.md +++ b/tests/README.md @@ -121,6 +121,28 @@ ctest -R Memcpy ctest -R memory ``` +### Performance tests: +``` +Above tests are direct tests which are majorly used for function verification. +We also provide performance tests under tests/performance folder. + +# Build all performance tests after running "make install" under build folder: +make build_perf + +Then all performance test applications will be built into ./performance_tests folder. + +# Run all performance tests: +make perf + +# Run individual performance test: +For example, +performance_tests/memory/hipPerfMemMallocCpyFree + +# Run a specific test set: +For example, +/usr/bin/ctest -C performance -R performance_tests/perfDispatch --verbose +Here "-C performance" indicate the "performance" configuration of ctest. +``` ### If a test fails - how to debug a test diff --git a/tests/hip_tests.txt b/tests/hip_tests.txt index 67a4238520..fd08932557 100644 --- a/tests/hip_tests.txt +++ b/tests/hip_tests.txt @@ -10,4 +10,4 @@ include(${HIP_SRC_PATH}/tests/hit/HIT.cmake) # Add tests include_directories(${HIP_SRC_PATH}/tests/src) -hit_add_directory_recursive(${HIP_SRC_PATH}/tests/src "directed_tests") +hit_add_directory_recursive(${HIP_CTEST_CONFIG_DEFAULT} ${HIP_SRC_PATH}/tests/src "directed_tests") diff --git a/tests/hit/HIT.cmake b/tests/hit/HIT.cmake index be89721ce3..1677d93a20 100644 --- a/tests/hit/HIT.cmake +++ b/tests/hit/HIT.cmake @@ -1,6 +1,9 @@ include(CTest) find_package(HIP REQUIRED) +set(HIP_CTEST_CONFIG_DEFAULT "default") +set(HIP_CTEST_CONFIG_PERFORMANCE "performance") + #------------------------------------------------------------------------------- # Helper macro to parse BUILD instructions macro(PARSE_BUILD_COMMAND _target _sources _hipcc_options _hcc_options _clang_options _nvcc_options _link_options _exclude_platforms _exclude_runtime _exclude_compiler _depends _dir) @@ -291,22 +294,30 @@ macro(READ_FROM_MAP _map _key _value) endmacro() # Helper macro to create a test -macro(MAKE_TEST exe) +macro(MAKE_TEST _config exe) string(REPLACE " " "" smush_args ${ARGN}) set(testname ${exe}${smush_args}.tst) - add_test(NAME ${testname} COMMAND ${PROJECT_BINARY_DIR}/${exe} ${ARGN}) + if(${_config} STREQUAL ${HIP_CTEST_CONFIG_DEFAULT}) + add_test(NAME ${testname} COMMAND ${PROJECT_BINARY_DIR}/${exe} ${ARGN}) + else() + add_test(NAME ${testname} CONFIGURATIONS ${_config} COMMAND ${PROJECT_BINARY_DIR}/${exe} ${ARGN}) + endif() set_tests_properties(${testname} PROPERTIES PASS_REGULAR_EXPRESSION "PASSED" ENVIRONMENT HIP_PATH=${HIP_ROOT_DIR}) endmacro() -macro(MAKE_NAMED_TEST exe testname) - add_test(NAME ${testname} COMMAND ${PROJECT_BINARY_DIR}/${exe} ${ARGN}) +macro(MAKE_NAMED_TEST _config exe testname) + if(${_config} STREQUAL ${HIP_CTEST_CONFIG_DEFAULT}) + add_test(NAME ${testname} COMMAND ${PROJECT_BINARY_DIR}/${exe} ${ARGN}) + else() + add_test(NAME ${testname} CONFIGURATIONS ${_config} COMMAND ${PROJECT_BINARY_DIR}/${exe} ${ARGN}) + endif() set_tests_properties(${testname} PROPERTIES PASS_REGULAR_EXPRESSION "PASSED" ENVIRONMENT HIP_PATH=${HIP_ROOT_DIR}) endmacro() #------------------------------------------------------------------------------- # Macro: HIT_ADD_FILES used to scan+add multiple files for testing. file(GLOB HIP_LIB_FILES ${HIP_PATH}/lib/*) -macro(HIT_ADD_FILES _dir _label _parent) +macro(HIT_ADD_FILES _config _dir _label _parent) foreach (file ${ARGN}) # Build tests execute_process(COMMAND ${HIP_SRC_PATH}/tests/hit/parser --buildCMDs ${file} @@ -398,7 +409,7 @@ macro(HIT_ADD_FILES _dir _label _parent) elseif(${HIP_RUNTIME} IN_LIST _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) elseif(_exclude_test_from_build STREQUAL TRUE) else() - make_test(${_label}/${_target} ${_arguments}) + make_test(${_config} ${_label}/${_target} ${_arguments}) endif() endforeach() @@ -420,7 +431,7 @@ macro(HIT_ADD_FILES _dir _label _parent) elseif(${HIP_RUNTIME} IN_LIST _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) elseif(_exclude_test_from_build STREQUAL TRUE) else() - make_named_test(${_label}/${_target} ${_label}/${_testname}.tst ${_arguments}) + make_named_test(${_config} ${_label}/${_target} ${_label}/${_testname}.tst ${_arguments}) endif() endforeach() endforeach() @@ -432,16 +443,16 @@ macro(HIT_ADD_DIRECTORY _dir _label) string(REGEX REPLACE "/" "." _parent ${_label}) add_custom_target(${_parent}) file(GLOB files "${_dir}/*.c*") - hit_add_files(${_dir} ${_label} ${parent} ${files}) + hit_add_files(${HIP_CTEST_CONFIG_DEFAULT} ${_dir} ${_label} ${parent} ${files}) endmacro() # Macro: HIT_ADD_DIRECTORY_RECURSIVE to scan+add all files in a directory+subdirectories for testing -macro(HIT_ADD_DIRECTORY_RECURSIVE _dir _label) +macro(HIT_ADD_DIRECTORY_RECURSIVE _config _dir _label) execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory ${_label} WORKING_DIRECTORY ${PROJECT_BINARY_DIR}) string(REGEX REPLACE "/" "." _parent ${_label}) add_custom_target(${_parent}) - if(${ARGC} EQUAL 3) - add_dependencies(${ARGV2} ${_parent}) + if(${ARGC} EQUAL 4) + add_dependencies(${ARGV3} ${_parent}) endif() file(GLOB children RELATIVE ${_dir} ${_dir}/*) set(dirlist "") @@ -449,12 +460,12 @@ macro(HIT_ADD_DIRECTORY_RECURSIVE _dir _label) if(IS_DIRECTORY ${_dir}/${child}) list(APPEND dirlist ${child}) else() - hit_add_files(${_dir} ${_label} ${_parent} ${child}) + hit_add_files(${_config} ${_dir} ${_label} ${_parent} ${child}) endif() endforeach() foreach(child ${dirlist}) string(REGEX REPLACE "/" "." _parent ${_label}) - hit_add_directory_recursive(${_dir}/${child} ${_label}/${child} ${_parent}) + hit_add_directory_recursive(${_config} ${_dir}/${child} ${_label}/${child} ${_parent}) endforeach() endmacro() diff --git a/tests/src/Performance/memory/hipPerfMemMallocCpyFree.cpp b/tests/performance/memory/hipPerfMemMallocCpyFree.cpp similarity index 98% rename from tests/src/Performance/memory/hipPerfMemMallocCpyFree.cpp rename to tests/performance/memory/hipPerfMemMallocCpyFree.cpp index f059a564a1..d58fdb381e 100644 --- a/tests/src/Performance/memory/hipPerfMemMallocCpyFree.cpp +++ b/tests/performance/memory/hipPerfMemMallocCpyFree.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. #include /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../../src/test_common.cpp EXCLUDE_HIP_PLATFORM nvcc * TEST: %t * HIT_END */ diff --git a/tests/src/Performance/perfDispatch/hipPerfBufferCopyRectSpeed.cpp b/tests/performance/perfDispatch/hipPerfBufferCopyRectSpeed.cpp similarity index 99% rename from tests/src/Performance/perfDispatch/hipPerfBufferCopyRectSpeed.cpp rename to tests/performance/perfDispatch/hipPerfBufferCopyRectSpeed.cpp index 71d8ebbe0a..5000904af9 100644 --- a/tests/src/Performance/perfDispatch/hipPerfBufferCopyRectSpeed.cpp +++ b/tests/performance/perfDispatch/hipPerfBufferCopyRectSpeed.cpp @@ -7,7 +7,7 @@ #include "test_common.h" /* HIT_START - * BUILD: %t %s ../../test_common.cpp timer.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../../src/test_common.cpp timer.cpp EXCLUDE_HIP_PLATFORM nvcc * TEST: %t * HIT_END */ diff --git a/tests/src/Performance/perfDispatch/hipPerfBufferCopySpeed.cpp b/tests/performance/perfDispatch/hipPerfBufferCopySpeed.cpp similarity index 99% rename from tests/src/Performance/perfDispatch/hipPerfBufferCopySpeed.cpp rename to tests/performance/perfDispatch/hipPerfBufferCopySpeed.cpp index 239d47b347..6f284ae7fb 100644 --- a/tests/src/Performance/perfDispatch/hipPerfBufferCopySpeed.cpp +++ b/tests/performance/perfDispatch/hipPerfBufferCopySpeed.cpp @@ -7,7 +7,7 @@ #include "test_common.h" /* HIT_START - * BUILD: %t %s ../../test_common.cpp timer.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../../src/test_common.cpp timer.cpp EXCLUDE_HIP_PLATFORM nvcc * TEST: %t * HIT_END */ diff --git a/tests/src/Performance/perfDispatch/hipPerfDispatchSpeed.cpp b/tests/performance/perfDispatch/hipPerfDispatchSpeed.cpp similarity index 98% rename from tests/src/Performance/perfDispatch/hipPerfDispatchSpeed.cpp rename to tests/performance/perfDispatch/hipPerfDispatchSpeed.cpp index 3d14c7b95f..84ba73c3aa 100644 --- a/tests/src/Performance/perfDispatch/hipPerfDispatchSpeed.cpp +++ b/tests/performance/perfDispatch/hipPerfDispatchSpeed.cpp @@ -7,7 +7,7 @@ #include "test_common.h" /* HIT_START - * BUILD: %t %s ../../test_common.cpp timer.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../../src/test_common.cpp timer.cpp EXCLUDE_HIP_PLATFORM nvcc * TEST: %t * HIT_END */ diff --git a/tests/src/Performance/perfDispatch/timer.cpp b/tests/performance/perfDispatch/timer.cpp similarity index 100% rename from tests/src/Performance/perfDispatch/timer.cpp rename to tests/performance/perfDispatch/timer.cpp diff --git a/tests/src/Performance/perfDispatch/timer.h b/tests/performance/perfDispatch/timer.h similarity index 100% rename from tests/src/Performance/perfDispatch/timer.h rename to tests/performance/perfDispatch/timer.h From 5e91bee221c89d80df2f4bdef844836a30640e06 Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Wed, 6 May 2020 14:22:03 -0400 Subject: [PATCH 28/33] SWDEV-232464 - Need to initialize image with ptr passed since they can pass image not of type __ClangOffloadBundler. Change-Id: I2c50042220a0230bc445ed21728f114a229c53e1 --- rocclr/hip_module.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rocclr/hip_module.cpp b/rocclr/hip_module.cpp index a09ac63c3c..db39b234b4 100755 --- a/rocclr/hip_module.cpp +++ b/rocclr/hip_module.cpp @@ -217,7 +217,8 @@ inline bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) hipError_t ihipModuleLoadData(hipModule_t* module, const void* mmap_ptr, size_t mmap_size) { - const void* image = nullptr; + /* initialize image it to the mmap_ptr, if this is of no_clang_offload bundle then they directly pass the image */ + const void* image = mmap_ptr; std::vector> code_objs; hipError_t code_obj_err = __hipExtractCodeObjectFromFatBinary(mmap_ptr, {hip::getCurrentDevice()->devices()[0]->info().name_}, code_objs); From 56691fe7e6425242d8fba1fa2a9c571369be8979 Mon Sep 17 00:00:00 2001 From: agodavar Date: Tue, 5 May 2020 05:13:03 -0400 Subject: [PATCH 29/33] SWDEV-233749 Added support for parallel build and link Change-Id: Id227ea1fe7574612c33a8e6d91fc59a29490a35f --- cmake/FindHIP.cmake | 30 +++++++++++++++---- cmake/FindHIP/run_hipcc.cmake | 3 +- hip-config.cmake.in | 20 +++++++++++++ .../CMakeLists.txt | 1 + 4 files changed, 48 insertions(+), 6 deletions(-) diff --git a/cmake/FindHIP.cmake b/cmake/FindHIP.cmake index 2331a31347..cc7f4af20c 100644 --- a/cmake/FindHIP.cmake +++ b/cmake/FindHIP.cmake @@ -1,7 +1,7 @@ ############################################################################### # FindHIP.cmake ############################################################################### - +include(CheckCXXCompilerFlag) ############################################################################### # SET: Variable defaults ############################################################################### @@ -202,16 +202,36 @@ set(CMAKE_SHARED_LIBRARY_RUNTIME_HIP_FLAG_SEP ${CMAKE_SHARED_LIBRARY_RUNTIME_CXX set(CMAKE_SHARED_LIBRARY_LINK_STATIC_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_LINK_STATIC_CXX_FLAGS}) set(CMAKE_SHARED_LIBRARY_LINK_DYNAMIC_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_LINK_DYNAMIC_CXX_FLAGS}) +set(HIP_CLANG_PARALLEL_BUILD_COMPILE_OPTIONS "") +set(HIP_CLANG_PARALLEL_BUILD_LINK_OPTIONS "") + if("${HIP_COMPILER}" STREQUAL "hcc") # Set the CMake Flags to use the HCC Compiler. set(CMAKE_HIP_CREATE_SHARED_LIBRARY "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} -o ") set(CMAKE_HIP_CREATE_SHARED_MODULE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} -o -shared" ) set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} -o ") elseif("${HIP_COMPILER}" STREQUAL "clang") + #Number of parallel jobs by default is 1 + if(NOT DEFINED HIP_CLANG_NUM_PARALLEL_JOBS) + set(HIP_CLANG_NUM_PARALLEL_JOBS 1) + endif() + #Add support for parallel build and link + if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + check_cxx_compiler_flag("-parallel-jobs=1" HIP_CLANG_SUPPORTS_PARALLEL_JOBS) + endif() + if(HIP_CLANG_NUM_PARALLEL_JOBS GREATER 1) + if(${HIP_CLANG_SUPPORTS_PARALLEL_JOBS}) + set(HIP_CLANG_PARALLEL_BUILD_COMPILE_OPTIONS "-parallel-jobs=${HIP_CLANG_NUM_PARALLEL_JOBS} -Wno-format-nonliteral") + set(HIP_CLANG_PARALLEL_BUILD_LINK_OPTIONS "-parallel-jobs=${HIP_CLANG_NUM_PARALLEL_JOBS}") + else() + message("clang compiler doesn't support parallel jobs") + endif() + endif() + # Set the CMake Flags to use the HIP-Clang Compiler. - set(CMAKE_HIP_CREATE_SHARED_LIBRARY "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HIP_CLANG_PATH} -o ") - set(CMAKE_HIP_CREATE_SHARED_MODULE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HIP_CLANG_PATH} -o -shared" ) - set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HIP_CLANG_PATH} -o ") + set(CMAKE_HIP_CREATE_SHARED_LIBRARY "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HIP_CLANG_PATH} ${HIP_CLANG_PARALLEL_BUILD_LINK_OPTIONS} -o ") + set(CMAKE_HIP_CREATE_SHARED_MODULE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HIP_CLANG_PATH} ${HIP_CLANG_PARALLEL_BUILD_LINK_OPTIONS} -o -shared" ) + set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HIP_CLANG_PATH} ${HIP_CLANG_PARALLEL_BUILD_LINK_OPTIONS} -o ") endif() ############################################################################### @@ -614,7 +634,7 @@ macro(HIP_ADD_EXECUTABLE hip_target) set(HIP_CLANG_PATH "/opt/rocm/llvm/bin") endif() endif() - set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HIP_CLANG_PATH} -o ") + set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HIP_CLANG_PATH} ${HIP_CLANG_PARALLEL_BUILD_LINK_OPTIONS} -o ") endif() add_executable(${hip_target} ${_cmake_options} ${_generated_files} ${_sources}) set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE HIP) diff --git a/cmake/FindHIP/run_hipcc.cmake b/cmake/FindHIP/run_hipcc.cmake index 01add26bcf..24b754c874 100644 --- a/cmake/FindHIP/run_hipcc.cmake +++ b/cmake/FindHIP/run_hipcc.cmake @@ -28,6 +28,7 @@ set(CMAKE_COMMAND "@CMAKE_COMMAND@") # path set(HIP_run_make2cmake "@HIP_run_make2cmake@") # path set(HCC_HOME "@HCC_HOME@") #path set(HIP_CLANG_PATH "@HIP_CLANG_PATH@") #path +set(HIP_CLANG_PARALLEL_BUILD_COMPILE_OPTIONS "@HIP_CLANG_PARALLEL_BUILD_COMPILE_OPTIONS@") @HIP_HOST_FLAGS@ @_HIP_HIPCC_FLAGS@ @@ -56,7 +57,7 @@ if(NOT host_flag) if(NOT "x${HIP_CLANG_PATH}" STREQUAL "x") set(ENV{HIP_CLANG_PATH} ${HIP_CLANG_PATH}) endif() - set(__CC_FLAGS ${HIP_HIPCC_FLAGS} ${HIP_CLANG_FLAGS} ${HIP_HIPCC_FLAGS_${build_configuration}} ${HIP_CLANG_FLAGS_${build_configuration}}) + set(__CC_FLAGS ${HIP_CLANG_PARALLEL_BUILD_COMPILE_OPTIONS} ${HIP_HIPCC_FLAGS} ${HIP_CLANG_FLAGS} ${HIP_HIPCC_FLAGS_${build_configuration}} ${HIP_CLANG_FLAGS_${build_configuration}}) endif() else() set(__CC_FLAGS ${HIP_HIPCC_FLAGS} ${HIP_NVCC_FLAGS} ${HIP_HIPCC_FLAGS_${build_configuration}} ${HIP_NVCC_FLAGS_${build_configuration}}) diff --git a/hip-config.cmake.in b/hip-config.cmake.in index 9ff5832cd9..9b5517d1f0 100644 --- a/hip-config.cmake.in +++ b/hip-config.cmake.in @@ -39,6 +39,10 @@ if (NOT _CMakeFindDependencyMacro_FOUND) endmacro() endif() +#Number of parallel jobs by default is 1 +if(NOT DEFINED HIP_CLANG_NUM_PARALLEL_JOBS) + set(HIP_CLANG_NUM_PARALLEL_JOBS 1) +endif() set(HIP_COMPILER "@HIP_COMPILER@") set(HIP_RUNTIME "@HIP_RUNTIME@") @@ -168,6 +172,22 @@ if(HIP_COMPILER STREQUAL "clang") INTERFACE_LINK_LIBRARIES "--cuda-gpu-arch=${GPU_TARGET}" ) endforeach() + #Add support for parallel build and link + if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + check_cxx_compiler_flag("-parallel-jobs=1" HIP_CLANG_SUPPORTS_PARALLEL_JOBS) + endif() + if(HIP_CLANG_NUM_PARALLEL_JOBS GREATER 1) + if(${HIP_CLANG_SUPPORTS_PARALLEL_JOBS} ) + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_COMPILE_OPTIONS -parallel-jobs=${HIP_CLANG_NUM_PARALLEL_JOBS} -Wno-format-nonliteral + ) + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_LINK_LIBRARIES -parallel-jobs=${HIP_CLANG_NUM_PARALLEL_JOBS} + ) + else() + message("clang compiler doesn't support parallel jobs") + endif() + endif() endif() set( hip_LIBRARIES hip::host hip::device) diff --git a/samples/2_Cookbook/12_cmake_hip_add_executable/CMakeLists.txt b/samples/2_Cookbook/12_cmake_hip_add_executable/CMakeLists.txt index c2a6d60cf3..0e8020a67a 100644 --- a/samples/2_Cookbook/12_cmake_hip_add_executable/CMakeLists.txt +++ b/samples/2_Cookbook/12_cmake_hip_add_executable/CMakeLists.txt @@ -11,6 +11,7 @@ set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) project(12_cmake) +set(HIP_CLANG_NUM_PARALLEL_JOBS 2) find_package(HIP QUIET) if(HIP_FOUND) message(STATUS "Found HIP: " ${HIP_VERSION}) From 819677825f1ebdf3ef6f82404ed37bf8c204c3bd Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Tue, 5 May 2020 22:58:40 +0000 Subject: [PATCH 30/33] Remove HIP_MARKER left overs due to HIP PR 2032 Change-Id: Ieae68dd3b12c92b1d6830619ca4c6ae43c400225 --- bin/hipcc | 17 -- docs/markdown/hip_profiling.md | 279 ------------------ samples/2_Cookbook/2_Profiler/Makefile | 53 ---- .../2_Cookbook/2_Profiler/MatrixTranspose.cpp | 219 -------------- samples/2_Cookbook/2_Profiler/Readme.md | 47 --- 5 files changed, 615 deletions(-) delete mode 100644 docs/markdown/hip_profiling.md delete mode 100644 samples/2_Cookbook/2_Profiler/Makefile delete mode 100644 samples/2_Cookbook/2_Profiler/MatrixTranspose.cpp delete mode 100644 samples/2_Cookbook/2_Profiler/Readme.md diff --git a/bin/hipcc b/bin/hipcc index d5ae94f9c3..720a9cffea 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -248,9 +248,6 @@ if ($HIP_PLATFORM eq "hcc" and $HIP_COMPILER eq "clang") { $HCC_VERSION_MAJOR=$HCC_VERSION; $HCC_VERSION_MAJOR=~s/\..*//; - $HIP_ATP_MARKER=$ENV{'HIP_ATP_MARKER'} // 1; - $marker_path = "$ROCM_PATH/profiler/CXLActivityLogger"; - # HCC* may be used to compile src/hip_hcc.o (and also feed the HIPCXXFLAGS below) $HCC = "$HCC_HOME/bin/hcc"; $HCCFLAGS = "-hc -D__HIPCC__ -isystem $HCC_HOME/include "; @@ -298,20 +295,6 @@ if ($HIP_PLATFORM eq "hcc" and $HIP_COMPILER eq "clang") { $HIPLDFLAGS .= " -L$HSA_PATH/lib -L$ROCM_PATH/lib -lhsa-runtime64 -lhc_am "; # $HIPLDFLAGS .= " -L$HCC_HOME/compiler/lib -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMMC -lLLVMCore -lLLVMSupport "; - # Add trace marker library: - # TODO - once we cleanly separate the HIP API headers from HIP library headers this logic should move to CMakebuild option - apps do not need to see the marker library. - if ($HIP_ATP_MARKER) { - $marker_inc_path = "$marker_path/include"; - if (-e $marker_inc_path) { - $HIPCXXFLAGS .= " -isystem $marker_inc_path"; - } - } - - $marker_lib_path = "$marker_path/bin/x86_64"; - if (-e $marker_lib_path) { - $HIPLDFLAGS .= " -L$marker_lib_path -lCXLActivityLogger -Wl,--rpath=$marker_lib_path"; - } - if (not $isWindows) { $HIPLDFLAGS .= " -lm"; } diff --git a/docs/markdown/hip_profiling.md b/docs/markdown/hip_profiling.md deleted file mode 100644 index 28ed37e321..0000000000 --- a/docs/markdown/hip_profiling.md +++ /dev/null @@ -1,279 +0,0 @@ -# Profiling HIP Code - -This section describes the profiling and debugging capabilities that HIP provides. -Profiling information can viewed in the CodeXL visualization tool or printed directly to stderr as the application runs. -This document starts with some of the general capabilities of CodeXL and then describes some of the additional HIP marker and debug features. - - - -- [CodeXL Profiling](#codexl-profiling) - * [Collecting and Viewing Traces](#collecting-and-viewing-traces) - + [Using rocm-profiler timestamp profiling](#using-rocm-profiler-timestamp-profiling) - + [Using rocm-profiler performance counter collection:](#using-rocm-profiler-performance-counter-collection) - + [Using CodeXL to view profiling results:](#using-codexl-to-view-profiling-results) - + [More information on CodeXL](#more-information-on-codexl) - * [HIP Markers](#hip-markers) - + [Profiling HIP APIs](#profiling-hip-apis) - + [Adding markers to applications](#adding-markers-to-applications) - * [Additional HIP Profiling Features](#additional-hip-profiling-features) - + [Demangling C++ Kernel Names](#demangling-c-kernel-names) - + [Controlling when profiling starts and ends](#controlling-when-profiling-starts-and-ends) - + [Reducing timeline trace output file size](#reducing-timeline-trace-output-file-size) - + [How to enable profiling at HIP build time](#how-to-enable-profiling-at-hip-build-time) -- [Tracing and Debug](#tracing-and-debug) - * [Tracing HIP APIs](#tracing-hip-apis) - + [Color](#color) - - - -## CodeXL Profiling - -### Collecting and Viewing Traces - -#### Using rocm-profiler timestamp profiling -rocm-profiler is a command-line tool for tracing any application that uses ROCr API, including HCC and HIP. -rocm-profiler's timeline trace will show the beginning and end for all kernel commands, data transfer commands, and HSA Runtime (ROCr) API calls. The trace results are saved into a file, which by convention uses the "atp" extension. Here is an example that shows how to run the command-line profiler: -```shell -$ /opt/rocm/bin/rocm-profiler -o -A -T -``` - -#### Using rocm-profiler performance counter collection: -rocm-profiler can record performance counter information to provide greater insight inside a kernel, such as the memory bandwidth, ALU busy percentage, and cache statistics. -Collecting the common set of useful counters requires passing the counter configuration files for two passes: -``` -$ /opt/rocm/bin/rocm-profiler -C -O --counterfile /opt/rocm/profiler/counterfiles/counters_HSA_Fiji_pass1 --counterfile /opt/rocm/profiler/counterfiles/counters_HSA_Fiji_pass2 -``` - - -#### Using CodeXL to view profiling results: -The trace can be loaded and viewed in the CodeXL visualization tool: - -- Open the CodeXL GUI, create an new project, and switch to "Profile Mode": - - $ CodeXL & - - [File->New Project, leave fields as is, just click "OK"] - - [Profile->Switch to Profile Mode] -- Load timestamp tracing results into a timeline view: - - Right click on the project in the CodeXL Explorer view - - Click "Import Session..." - - Select to $HOME/apitrace.atp (or appropriate .atp file if you used another file name) - -- Load the performance counter results - - Right click on the project in the CodeXL Explorer view - - Click "Import Session..." - - Select $HOME/Session1.csv (or appropriate .csv file if you used another file name) - - -#### More information on CodeXL -rocm-profiler --help will show additional options and usage guidelines. - -See this [blog](http://gpuopen.com/getting-up-to-speed-with-the-codexl-gpu-profiler-and-radeon-open-compute/) for more information on profiling ROCm apps (including HIP) with CodeXL. - -The 2.2 version of Windows CodeXL does not correctly handle Linux line-endings. If you are collecting a trace on Linux and then viewing it with the 2.2 Windows CodeXL, first convert the line ending in the .atp file to Windows-style line endings. - -### HIP Markers -#### Profiling HIP APIs -HIP can generate markers at function beginning and end which are displayed on the CodeXL timeline view. -HIP 1.0 compiles marker support by default, and you can enable it by setting the HIP_PROFILE_API environment variable and then running the rocm-profiler: - -```shell - -# Use profile to generate timeline view: -export HIP_PROFILE_API=1 -$ /opt/rocm/bin/rocm-profiler -A -T - -Or -$ /opt/rocm/bin/rocm-profiler -e HIP_PROFILE_API=1 -A -T -``` - -HIP_PROFILE_API supports two levels of information. -- HIP_PROFILE_API=1 : Short format. Print name of API but no arguments. For example: -`hipMemcpy` -- HIP_PROFILE_API=2 : Long format. Print name of API + values of all function arguments. For example: -`hipMemcpy (0x7f32154db010, 0x50446e000, 4000000, hipMemcpyDeviceToHost)` - -#### Adding markers to applications - -Markers can be used to define application-specific events that will be recorded in the ATP file and displayed in the CodeXL GUI. -This can be particularly useful for visualizing how the higher-level phases of application behavior relate to the lower level HIP APIs, kernel launches, and data transfers. -For example, an instrumented machine learning framework could show the beginning and ending of each layer in the network. - -Markers have a specific begin and end time, and can be nested. Nested calls are displayed hierarchically in the CodeXL GUI, with each level of the hierarchy occupying a different row. - -The HIP APis are defined in "hip_profile.h": -``` -#include - -HIP_BEGIN_MARKER(const char *markerName, const char *groupName); -HIP_END_MARKER(); - -HIP_BEGIN_MARKER("Setup", "MyAppGroup"); -// ... -// application code for setup -// ... -HIP_END_MARKER(); -``` - -For C++ codes, HIP also provides a scoped marker which records the start time when constructed and the end time when the scoped marker is destructed at the end of the scope. This provides a convenient, single-line mechanism to record an event that neatly corresponds to a region of code. - -```cxx -void FunctionFoo(...) -{ - HIP_SCOPED_MARKER("FunctionFoo", "MyAppGroup"); // Marker starts recording here. - - // ... - // Function implementation - // ... - - // Marker destroyed here and records end time stamp. -}; -``` - -The HIP marker API is only supported on ROCm platform. The marker macros are defined on CUDA platforms and will compile, but are silently ignored at runtime. - -This [HIP sample](https://github.com/ROCm-Developer-Tools/HIP/tree/master/samples/2_Cookbook/2_Profiler) shows the profiler marker API used in a small application. - -More information on the marker API can be found in the profiler header file and PDF in a ROCm installation: -- /opt/rocm/profiler/CXLActivityLogger/include/CXLActivityLogger.h -- /opt/rocm/profiler/CXLActivityLogger/doc/CXLActivityLogger.pdf - -### Additional HIP Profiling Features -#### Demangling C++ Kernel Names -HIP includes the `hipdemangleatp` tool which can post-process an ATP file to "demangle" C++ names. -Mangled kernel names encode the C++ arguments and other information, and are guaranteed to be unique even for cases such as operator overloading. However, the mangled names can be quite verbose. For example: - -`ZZ39gemm_NoTransA_MICRO_NBK_M_N_K_TS16XMTS4RN2hc16accelerator_viewEPKflS3_lPfliiiiiiffEN3_EC__719__cxxamp_trampolineElililiiiiiiS3_iS3_S4_ff` - -`hipdemangleatp` will convert this into the more readable: -`gemm_NoTransA_MICRO_NBK_M_N_K_TS16XMTS4` - -The `hipdemangleatp` tool operates on the ATP file "in-place" and thus replaces the input file with the demangled version. - -``` -$ hipdemangleatp myfile.atp -``` - -The kernel name is also shown in some of the summary htlm files (Top10 kernels). These can be regenerated from the demangled ATP file by re-running rocm-profiler: -``` -$ rocm-profiler -T --atpfile myfile.atp -``` - -A future version of CodeXL may directly integrate demangle functionality. - - -#### Controlling when profiling starts and ends -hipProfilerStart() and hipProfilerEnd() can be inserted into an application to control which phases of the applications are profiled. -These APIs can be used to skip initialization code or to focus profiling on a desired region, and are particularly useful for large long-running applications. -See the API documentation for more information. These APIs work on both ROCm and CUDA paths. - -On ROCm, the following environment variables can be used to control when profiling occurs: - -``` -HIP_DB_START_API : Comma-separated list of tid.api_seq_num for when to start debug and profiling. -HIP_DB_STOP_API : Comma-separated list of tid.api_seq_num for when to stop debug and profiling. -``` - -HIP/ROCm assigns a monotonically increasing sequence number to the APIs called from each thread. The thread and API sequence number can be used in the above API to control when tracing starts and stops. These flags also control the HIP_DB messages (described below). - -When using these options, start the profiler with profiling disabled: -``` -# ROCm: -$ rocm-profiler --startdisabled ... - -# CUDA: -$ nvprof --profile-from-start-off ... -``` - -This feature is under development. - -#### Reducing timeline trace output file size -If the application is already recording the HIP APIs, the HSA APIs are somewhat redundant and the ATP file size can be substantially reduced by not recording these APIs. HIP includes a text file that lists all of the HSA APIs and can assist in this filtering: - -``` -$ rocm-profiler -F hip/bin/hsa-api-filter-cxl.txt -``` - -This file can be copied and edited to provide more selective HSA event recording. - - -#### How to enable profiling at HIP build time -Pre-built packages of HIP are not built with profiling support enabled.You must enable marker support manually when compiling HIP. - -1. Build HIP with ATP markers enabled -HIP pre-built packages are enabled with ATP marker support by default. -To enable ATP marker support when building HIP from source, use the option ```-DCOMPILE_HIP_ATP_MARKER=1``` during the cmake configure step. Build and install HIP. -```shell -$ mkdir build && cd build -$ cmake .. -DCOMPILE_HIP_ATP_MARKER -$ make install -``` - -2. Install ROCm-Profiler -Installing HIP from the [rocm](http://gpuopen.com/getting-started-with-boltzmann-components-platforms-installation/) pre-built packages, installs the ROCm-Profiler as well. -Alternatively, you can build ROCm-Profiler using the instructions [here](https://github.com/RadeonOpenCompute/ROCm-Profiler#building-the-rocm-profiler). - -3. Recompile the target application - -Then follow the steps above to collect a marker-enabled trace. - - -## Tracing and Debug - -### Tracing HIP APIs -The HIP runtime can print the HIP function strings to stderr using HIP_TRACE_API environment variable. -The trace prints two messages for each API - one at the beginning of the API call (line starts with "<<") and one at the end of the API call (line ends with ">>"). -Here's an example for one API followed by a description for the sections of the trace: - -``` -<> -``` - -- `<> -info: running on device gfx803 -info: allocate host mem ( 7.63 MB) -info: allocate device mem ( 7.63 MB) -<> -<> -info: copy Host2Device -<> -info: launch 'vector_square' kernel -1.5 hipLaunchKernel 'HIP_KERNEL_NAME(vector_square)' gridDim:{512,1,1} groupDim:{256,1,1} sharedMem:+0 stream#0.0 -info: copy Device2Host -<> -info: check result -PASSED! -``` - -HIP_TRACE_API supports multiple levels of debug information: - - 0x1 = print all HIP APIs. This is the most verbose setting; the flags below allow selecting a subset. - - 0x2 = print HIP APIs which initiate GPU kernel commands. Includes hipLaunchKernel, hipLaunchModuleKernel - - 0x4 = print HIP APIs which initiate GPU memory commands. Includes hipMemcpy*, hipMemset*. - - 0x8 = print HIP APIs which allocate or free memory. Includes hipMalloc, hipHostMalloc, hipFree, hipHostFree. - -These can be combined. For example, HIP_TRACE_API=6 shows a concise view of the HIP commands (both kernel and memory) that are sent to the GPU. - - -#### Color -Note this trace mode uses colors. "less -r" can handle raw control characters and will display the debug output in proper colors. -You can change the color used for the trace mode with the HIP_TRACE_API_COLOR environment variable. Possible values are None/Red/Green/Yellow/Blue/Magenta/Cyan/White. -None will disable use of color control codes for both the opening and closing and may be useful when saving the trace file or when a pure text trace is desired. - - - diff --git a/samples/2_Cookbook/2_Profiler/Makefile b/samples/2_Cookbook/2_Profiler/Makefile deleted file mode 100644 index db2d008182..0000000000 --- a/samples/2_Cookbook/2_Profiler/Makefile +++ /dev/null @@ -1,53 +0,0 @@ -HIP_PATH?= $(wildcard /opt/rocm/hip) - -HIPCC=$(HIP_PATH)/bin/hipcc - - -HIPPROFILER=/opt/rocm/bin/rocm-profiler -PROFILER_OPT=-A -o MT.atp -e HIP_PROFILE_API=1 -HIPPROFILER_POST_CMD=$(HIP_PATH)/bin/hipdemangleatp MT.atp - -TARGET=hcc - -SOURCES = MatrixTranspose.cpp -OBJECTS = $(SOURCES:.cpp=.o) - -EXECUTABLE=./MatrixTranspose - -.PHONY: test - - -all: $(EXECUTABLE) profile - - - -OPT =-g -CXXFLAGS =$(OPT) -CXX=$(HIPCC) - - -$(EXECUTABLE): $(OBJECTS) - $(HIPCC) $(OBJECTS) -o $@ - - -profile: $(EXECUTABLE) - $(HIPPROFILER) $(PROFILER_OPT) $(EXECUTABLE) - $(HIPPROFILER_POST_CMD) - - -# Pass option to control start and stop iterations for profiling - see MatrixTranspose.cpp for implementation: -# Note we start profiler in --startdisabled mode - no timing collected until app enabled it via hipProfilerStart() -profile_trigger: $(EXECUTABLE) - $(HIPPROFILER) $(PROFILER_OPT) --startdisabled $(EXECUTABLE) 3 6 - $(HIPPROFILER_POST_CMD) - - -run: $(EXECUTABLE) - $(EXECUTABLE) - - -clean: - rm -f $(EXECUTABLE) - rm -f $(OBJECTS) - rm -f $(HIP_PATH)/src/*.o - diff --git a/samples/2_Cookbook/2_Profiler/MatrixTranspose.cpp b/samples/2_Cookbook/2_Profiler/MatrixTranspose.cpp deleted file mode 100644 index 69266e1288..0000000000 --- a/samples/2_Cookbook/2_Profiler/MatrixTranspose.cpp +++ /dev/null @@ -1,219 +0,0 @@ -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include - -// hip header file -#include "hip/hip_runtime.h" -#include "hip/hip_profile.h" - -#define WIDTH 1024 - -#define NUM (WIDTH * WIDTH) - -#define THREADS_PER_BLOCK_X 4 -#define THREADS_PER_BLOCK_Y 4 -#define THREADS_PER_BLOCK_Z 1 - -#define ITERATIONS 10 - -// Cmdline parms to control start and stop triggers -int startTriggerIteration = -1; -int stopTriggerIteration = -1; - -// Device (Kernel) function, it must be void -__global__ void matrixTranspose(float* out, float* in, const int width) { - int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; - - out[y * width + x] = in[x * width + y]; -} - -// CPU implementation of matrix transpose -void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) { - for (unsigned int j = 0; j < width; j++) { - for (unsigned int i = 0; i < width; i++) { - output[i * width + j] = input[j * width + i]; - } - } -} - - -// Use a separate function to demonstrate how to use function name as part of scoped marker: -void runGPU(float* Matrix, float* TransposeMatrix, float* gpuMatrix, float* gpuTransposeMatrix) { - // __func__ is a standard C++ macro which expands to the name of the function, in this case - // "runGPU" - HIP_SCOPED_MARKER(__func__, "MyGroup"); - - for (int i = 0; i < ITERATIONS; i++) { - if (i == startTriggerIteration) { - hipProfilerStart(); - } - if (i == stopTriggerIteration) { - hipProfilerStop(); - } - - float eventMs = 0.0f; - - hipEvent_t start, stop; - hipEventCreate(&start); - hipEventCreate(&stop); - - - // Record the start event - hipEventRecord(start, NULL); - - // Memory transfer from host to device - hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); - - // Record the stop event - hipEventRecord(stop, NULL); - hipEventSynchronize(stop); - - hipEventElapsedTime(&eventMs, start, stop); - - printf("hipMemcpyHostToDevice time taken = %6.3fms\n", eventMs); - - // Record the start event - hipEventRecord(start, NULL); - - // Lauching kernel from host - hipLaunchKernelGGL(matrixTranspose, - dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), - dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, - gpuMatrix, WIDTH); - - // Record the stop event - hipEventRecord(stop, NULL); - hipEventSynchronize(stop); - hipEventElapsedTime(&eventMs, start, stop); - - printf("kernel Execution time = %6.3fms\n", eventMs); - - // Record the start event - hipEventRecord(start, NULL); - - // Memory transfer from device to host - hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); - - // Record the stop event - hipEventRecord(stop, NULL); - hipEventSynchronize(stop); - - hipEventElapsedTime(&eventMs, start, stop); - - printf("hipMemcpyDeviceToHost time taken = %6.3fms\n", eventMs); - } -}; - - -int main(int argc, char* argv[]) { - if (argc >= 2) { - startTriggerIteration = atoi(argv[1]); - printf("info : will start tracing at iteration:%d\n", startTriggerIteration); - } - if (argc >= 3) { - stopTriggerIteration = atoi(argv[2]); - printf("info : will stop tracing at iteration:%d\n", stopTriggerIteration); - } - - float* Matrix; - float* TransposeMatrix; - float* cpuTransposeMatrix; - - float* gpuMatrix; - float* gpuTransposeMatrix; - - hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); - - std::cout << "Device name " << devProp.name << std::endl; - - { - // Show example of how to create a "scoped marker". - // The scoped marker records the time spent inside the { scope } of the marker - the begin - // timestamp is at the beginning of the code scope, and the end is recorded when the SCOPE - // exits. This can be viewed in CodeXL timeline relative to other GPU and CPU events. This - // marker captures the time spent in setup including host allocation, initialization, and - // device memory allocation. - HIP_SCOPED_MARKER("Setup", "MyGroup"); - - - Matrix = (float*)malloc(NUM * sizeof(float)); - TransposeMatrix = (float*)malloc(NUM * sizeof(float)); - cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); - - // initialize the input data - for (int i = 0; i < NUM; i++) { - Matrix[i] = (float)i * 10.0f; - } - - - // allocate the memory on the device side - hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); - - // FYI, the scoped-marker will be destroyed here when the scope exits, and will record its - // "end" timestamp. - } - - runGPU(Matrix, TransposeMatrix, gpuMatrix, gpuTransposeMatrix); - - - // show how to use explicit begin/end markers: - // We begin the timed region with HIP_BEGIN_MARKER, passing in the markerName and group: - // The region will stop when HIP_END_MARKER is called - // This is another way to mark begin/end - as an alternative to scoped markers. - HIP_BEGIN_MARKER("Check&TearDown", "MyGroup"); - - int errors = 0; - - // CPU MatrixTranspose computation - matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); - - // verify the results - double eps = 1.0E-6; - for (int i = 0; i < NUM; i++) { - if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { - errors++; - } - } - if (errors != 0) { - printf("FAILED: %d errors\n", errors); - } else { - printf("PASSED!\n"); - } - - // free the resources on device side - hipFree(gpuMatrix); - hipFree(gpuTransposeMatrix); - - // free the resources on host side - free(Matrix); - free(TransposeMatrix); - free(cpuTransposeMatrix); - - // This ends the last marker started in this thread, in this case "Check&TearDown" - HIP_END_MARKER(); - - return errors; -} diff --git a/samples/2_Cookbook/2_Profiler/Readme.md b/samples/2_Cookbook/2_Profiler/Readme.md deleted file mode 100644 index 8b32beb348..0000000000 --- a/samples/2_Cookbook/2_Profiler/Readme.md +++ /dev/null @@ -1,47 +0,0 @@ -## Using hipEvents to measure performance ### - -This tutorial is follow-up of the previous two tutorial where we learn how to write our first hip program, in which we compute Matrix Transpose and in second one, we added feature to measure time taken for memory transfer and kernel execution. In this tutorial, we'll explain how to use the codexl/rocm-profiler for hip timeline tracing. Also, we will augment the source code with additional markers so we can see the high-level application flow alongside the information that CodeXL automatically collects. - - -## Introduction: - -CodeXL and rocm-profiler are the tool used for profiling the application, which is of prominent use in optimizing the application by means of finding the memory bottlenecks and etc. - -## Requirement: -[CodeXL Installation](http://gpuopen.com/compute-product/codexl/) - -## prerequiste knowledge: - -Programmers familiar with CUDA, OpenCL will be able to quickly learn and start coding with the HIP API. In case you are not, don't worry. You choose to start with the best one. We'll be explaining everything assuming you are completely new to gpgpu programming. - -## Simple Matrix Transpose - -We will be using the Simple Matrix Transpose source code from the previous tutorial as it is. - -## Using CodeXL markers for HIP Functions - -HIP can generate markers at function being/end which are displayed on the CodeXL timeline view. To do this, you need to install ROCm-Profiler and enable HIP to generate the markers: - -1. Install ROCm-Profiler Installing HIP from the rocm pre-built packages, installs the ROCm-Profiler as well. Alternatively, you can build ROCm-Profiler using the instructions given below. - - -2. Run with profiler enabled to generate ATP file. -(These steps are also captured in the Makefile) -The HIP_PROFILE_API enables display of the HIP APIs on the CodeXL trimeline view. -`/opt/rocm/bin/rocm-profiler -o -A -e HIP_PROFILE_API=1 ` - -##Using HIP_TRACE_API - -You can also print the HIP function strings to stderr using HIP_TRACE_API environment variable. This can also be combined with the more detailed debug information provided by the HIP_DB switch. For example: -`HIP_TRACE_API=1 HIP_DB=0x2 ./myHipApp` -Note this trace mode uses colors. "less -r" can handle raw control characters and will display the debug output in proper colors. - -## More Info: -- [HIP FAQ](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_faq.md) -- [HIP Kernel Language](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_kernel_language.md) -- [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) -- [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) -- [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) -- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) -- [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) -- [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) From 1c1be71b6346b58ee8e84e1596939eeff12afc16 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Wed, 6 May 2020 20:10:27 +0000 Subject: [PATCH 31/33] Fix missed VDI names in hip-on-rocclr Change-Id: I830feb37a043656136648e92a0c6f1eaae8402d7 --- bin/hipcc | 2 +- bin/hipconfig | 4 ++-- rocclr/hip_internal.hpp | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/bin/hipcc b/bin/hipcc index 720a9cffea..171eddef5c 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -127,7 +127,7 @@ $HIP_PLATFORM= `$HIP_PATH/bin/hipconfig --platform` // "hcc"; $HIP_VERSION= `$HIP_PATH/bin/hipconfig --version`; #HIP_COMPILER controls whether to use hcc, clang or nvcc for compilation: $HIP_COMPILER= `$HIP_PATH/bin/hipconfig --compiler`; -#HIP_RUNTIME controls whether to use HCC, VDI, or NVCC as the runtime: +#HIP_RUNTIME controls whether to use HCC, ROCclr, or NVCC as the runtime: $HIP_RUNTIME= `$HIP_PATH/bin/hipconfig --runtime`; # If using ROCclr runtime, need to find HIP_ROCclr_HOME diff --git a/bin/hipconfig b/bin/hipconfig index 03b412421b..ecd1449b2e 100755 --- a/bin/hipconfig +++ b/bin/hipconfig @@ -118,8 +118,8 @@ if ($HIP_COMPILER eq "clang") { $CPP_CONFIG = " -D__HIP_PLATFORM_HCC__= -I$HIP_PATH/include -I$HIP_CLANG_PATH/../lib/clang/$HIP_CLANG_VERSION -I$HSA_PATH/include"; } -if ($HIP_RUNTIME eq "VDI") { - $CPP_CONFIG .= " -D__HIP_VDI__"; +if ($HIP_RUNTIME eq "ROCclr") { + $CPP_CONFIG .= " -D__HIP_ROCclr__"; } if ($HIP_PLATFORM eq "nvcc") { $CPP_CONFIG = " -D__HIP_PLATFORM_NVCC__= -I$HIP_PATH/include -I$CUDA_PATH/include"; diff --git a/rocclr/hip_internal.hpp b/rocclr/hip_internal.hpp index eda87cea7b..4a40018745 100755 --- a/rocclr/hip_internal.hpp +++ b/rocclr/hip_internal.hpp @@ -112,12 +112,12 @@ namespace hip { /// HIP Device class class Device { amd::Monitor lock_{"Device lock"}; - /// VDI context + /// ROCclr context amd::Context* context_; /// Device's ID /// Store it here so we don't have to loop through the device list every time int deviceId_; - /// VDI host queue for default streams + /// ROCclr host queue for default streams Stream null_stream_; //Maintain list of user enabled peers std::list userEnabledPeers; @@ -168,11 +168,11 @@ namespace hip { extern void setCurrentDevice(unsigned int index); - /// Get VDI queue associated with hipStream + /// Get ROCclr queue associated with hipStream /// Note: This follows the CUDA spec to sync with default streams /// and Blocking streams extern amd::HostQueue* getQueue(hipStream_t s); - /// Get default stream associated with the VDI context + /// Get default stream associated with the ROCclr context extern amd::HostQueue* getNullStream(amd::Context&); /// Get default stream of the thread extern amd::HostQueue* getNullStream(); From 0231de31642484e98c0b0913a5a6ac7ef35a62ab Mon Sep 17 00:00:00 2001 From: agodavar Date: Thu, 7 May 2020 12:14:24 -0400 Subject: [PATCH 32/33] Fix hip-config.cmake build error in rocBLAS Change-Id: I097f0dac0f67bfc22e9991350bf63bc7bfa8b269 --- hip-config.cmake.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hip-config.cmake.in b/hip-config.cmake.in index 9b5517d1f0..5a67c62383 100644 --- a/hip-config.cmake.in +++ b/hip-config.cmake.in @@ -1,5 +1,5 @@ @PACKAGE_INIT@ - +include(CheckCXXCompilerFlag) include(CMakeFindDependencyMacro OPTIONAL RESULT_VARIABLE _CMakeFindDependencyMacro_FOUND) if (NOT _CMakeFindDependencyMacro_FOUND) macro(find_dependency dep) From af4692f838456b6b9ca1ec7a0a3c454ad33fbe53 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 6 May 2020 17:52:26 -0400 Subject: [PATCH 33/33] Fix generated hip_prof_str.h install again Change-Id: I92ea4bedbcdc92974998c00ca7f0c00098791cb5 --- CMakeLists.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index db72c8dd75..fdf019b9da 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -465,9 +465,10 @@ endif() # Install generated headers # FIXME: Associate with individual targets. -install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/hip - DESTINATION include - FILES_MATCHING PATTERN "*.h*") +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/hip/hcc_detail + DESTINATION include/hip + FILES_MATCHING PATTERN "*.h*") + ############################# # hip-config