From 94699a7a6f61afdf3fb6d945c932474513cc36bb Mon Sep 17 00:00:00 2001 From: Sarbojit Sarkar Date: Thu, 7 May 2020 03:57:58 -0400 Subject: [PATCH 01/30] Enabling hipGetDeviceFlags required in [SWDEV-229170] Change-Id: I998d37e5847f9651345554bada86df6fce86d1eb --- hipamd/include/hip/hcc_detail/hip_runtime_api.h | 8 ++++++++ hipamd/include/hip/nvcc_detail/hip_runtime_api.h | 4 ++++ hipamd/rocclr/hip_device_runtime.cpp | 10 ++++++++-- hipamd/rocclr/hip_hcc.def.in | 1 + hipamd/rocclr/hip_hcc.map.in | 1 + hipamd/rocclr/hip_internal.hpp | 8 ++++++-- 6 files changed, 28 insertions(+), 4 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/hip_runtime_api.h b/hipamd/include/hip/hcc_detail/hip_runtime_api.h index 7363f904ed..17c34b0ad5 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime_api.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime_api.h @@ -506,6 +506,14 @@ hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t config); */ hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* pConfig); +/** + * @brief Gets the flags set for current device + * + * @param [out] flags + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + */ +hipError_t hipGetDeviceFlags(unsigned *flags); /** * @brief The bank width of shared memory on current device is set diff --git a/hipamd/include/hip/nvcc_detail/hip_runtime_api.h b/hipamd/include/hip/nvcc_detail/hip_runtime_api.h index 3890028950..23b8a0619d 100644 --- a/hipamd/include/hip/nvcc_detail/hip_runtime_api.h +++ b/hipamd/include/hip/nvcc_detail/hip_runtime_api.h @@ -1511,6 +1511,10 @@ inline static hipError_t hipProfilerStart() { return hipCUDAErrorTohipError(cuda inline static hipError_t hipProfilerStop() { return hipCUDAErrorTohipError(cudaProfilerStop()); } +inline static hipError_t hipGetDeviceFlags(unsigned int* flags) { + return hipCUDAErrorTohipError(cudaGetDeviceFlags(flags)); +} + inline static hipError_t hipSetDeviceFlags(unsigned int flags) { return hipCUDAErrorTohipError(cudaSetDeviceFlags(flags)); } diff --git a/hipamd/rocclr/hip_device_runtime.cpp b/hipamd/rocclr/hip_device_runtime.cpp index 86a1590533..531f35c732 100644 --- a/hipamd/rocclr/hip_device_runtime.cpp +++ b/hipamd/rocclr/hip_device_runtime.cpp @@ -471,7 +471,12 @@ hipError_t hipGetDeviceCount ( int* count ) { } hipError_t hipGetDeviceFlags ( unsigned int* flags ) { - HIP_RETURN(hipErrorNotSupported); + HIP_INIT_API(hipGetDeviceFlags, flags); + if (flags == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + *flags = hip::getCurrentDevice()->getFlags(); + HIP_RETURN(hipSuccess); } hipError_t hipIpcGetEventHandle ( hipIpcEventHandle_t* handle, hipEvent_t event ) { @@ -531,7 +536,8 @@ hipError_t hipSetDeviceFlags ( unsigned int flags ) { default: break; } - + hip::getCurrentDevice()->setFlags(flags & hipDeviceScheduleMask); + HIP_RETURN(hipSuccess); } diff --git a/hipamd/rocclr/hip_hcc.def.in b/hipamd/rocclr/hip_hcc.def.in index 238d7fe02a..579608e685 100755 --- a/hipamd/rocclr/hip_hcc.def.in +++ b/hipamd/rocclr/hip_hcc.def.in @@ -149,6 +149,7 @@ hipPointerGetAttributes hipProfilerStart hipProfilerStop hipRuntimeGetVersion +hipGetDeviceFlags hipSetDevice hipSetDeviceFlags hipStreamAddCallback diff --git a/hipamd/rocclr/hip_hcc.map.in b/hipamd/rocclr/hip_hcc.map.in index f2491cd283..19da8a6991 100755 --- a/hipamd/rocclr/hip_hcc.map.in +++ b/hipamd/rocclr/hip_hcc.map.in @@ -149,6 +149,7 @@ global: hipProfilerStart; hipProfilerStop; hipRuntimeGetVersion; + hipGetDeviceFlags; hipSetDevice; hipSetDeviceFlags; hipStreamAddCallback; diff --git a/hipamd/rocclr/hip_internal.hpp b/hipamd/rocclr/hip_internal.hpp index 4a40018745..643a43341b 100755 --- a/hipamd/rocclr/hip_internal.hpp +++ b/hipamd/rocclr/hip_internal.hpp @@ -119,12 +119,14 @@ namespace hip { int deviceId_; /// ROCclr host queue for default streams Stream null_stream_; - //Maintain list of user enabled peers + /// Store device flags + unsigned int flags_; + /// Maintain list of user enabled peers std::list userEnabledPeers; public: Device(amd::Context* ctx, int devId): - context_(ctx), deviceId_(devId), null_stream_(this, amd::CommandQueue::Priority::Normal, 0, true) + context_(ctx), deviceId_(devId), null_stream_(this, amd::CommandQueue::Priority::Normal, 0, true), flags_(hipDeviceScheduleSpin) { assert(ctx != nullptr); } ~Device() {} @@ -152,6 +154,8 @@ namespace hip { return hipErrorPeerAccessNotEnabled; } } + unsigned int getFlags() const { return flags_; } + void setFlags(unsigned int flags) { flags_ = flags; } amd::HostQueue* NullStream(bool skip_alloc = false); }; From 89415b66f087a29136e23c9e91b0162e9b8d510e Mon Sep 17 00:00:00 2001 From: agodavar Date: Mon, 4 May 2020 08:26:58 -0400 Subject: [PATCH 02/30] SWDEV-233950:link to /opt/rocm/hip/lib/cmake left behind during uninstall Change-Id: Ideb76b73916881469e39b7a0a4d68d7ec098b4a8 Signed-off-by: agodavar --- hipamd/packaging/hip-hcc.postinst | 2 +- hipamd/packaging/hip-rocclr.postinst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hipamd/packaging/hip-hcc.postinst b/hipamd/packaging/hip-hcc.postinst index 2371b53e17..080c846f40 100755 --- a/hipamd/packaging/hip-hcc.postinst +++ b/hipamd/packaging/hip-hcc.postinst @@ -14,7 +14,7 @@ HIPDIR=$ROCMDIR/hip HIPLIBDIR=$ROCMDIR/hip/lib # Soft-link to library files -HIPLIBFILES=$(ls -A $HIPLIBDIR | grep -v [-/$]) +HIPLIBFILES=$(ls -A $HIPLIBDIR | grep -v "cmake\|[-/$]") mkdir -p $ROCMLIBDIR mkdir -p $ROCMLIBDIR/cmake pushd $ROCMLIBDIR diff --git a/hipamd/packaging/hip-rocclr.postinst b/hipamd/packaging/hip-rocclr.postinst index 2371b53e17..080c846f40 100755 --- a/hipamd/packaging/hip-rocclr.postinst +++ b/hipamd/packaging/hip-rocclr.postinst @@ -14,7 +14,7 @@ HIPDIR=$ROCMDIR/hip HIPLIBDIR=$ROCMDIR/hip/lib # Soft-link to library files -HIPLIBFILES=$(ls -A $HIPLIBDIR | grep -v [-/$]) +HIPLIBFILES=$(ls -A $HIPLIBDIR | grep -v "cmake\|[-/$]") mkdir -p $ROCMLIBDIR mkdir -p $ROCMLIBDIR/cmake pushd $ROCMLIBDIR From c70a32c5a7d26122e8fab366e9cc3805ba37041f Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Thu, 7 May 2020 17:28:07 +0000 Subject: [PATCH 03/30] SWDEV-234954 - Fix hipconfig on Windows Before setting the HIP_RUNTIME and HIP_COMPILER variables, first check the environment if these are set. We should prioritize the environment settings. For windows, it will be set, and also explicitly call perl when invoking hipconfig. Change-Id: I89ad267285239e6d8a897dc681c4af5906e7b9d8 --- hipamd/bin/hipcc | 16 ++++++++++++---- hipamd/bin/hipconfig | 11 +++++++++-- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/hipamd/bin/hipcc b/hipamd/bin/hipcc index 171eddef5c..b346be60e2 100755 --- a/hipamd/bin/hipcc +++ b/hipamd/bin/hipcc @@ -123,12 +123,20 @@ sub delete_temp_dirs { #--- #HIP_PLATFORM controls whether to use hcc (AMD) or nvcc as the platform: -$HIP_PLATFORM= `$HIP_PATH/bin/hipconfig --platform` // "hcc"; -$HIP_VERSION= `$HIP_PATH/bin/hipconfig --version`; #HIP_COMPILER controls whether to use hcc, clang or nvcc for compilation: -$HIP_COMPILER= `$HIP_PATH/bin/hipconfig --compiler`; #HIP_RUNTIME controls whether to use HCC, ROCclr, or NVCC as the runtime: -$HIP_RUNTIME= `$HIP_PATH/bin/hipconfig --runtime`; +if ($isWindows) { + # Windows cannot run perl natively, so hipcc will explicitly call perl + $HIP_PLATFORM= `perl $HIP_PATH/bin/hipconfig --platform`; + $HIP_VERSION= `perl $HIP_PATH/bin/hipconfig --version`; + $HIP_COMPILER= `perl $HIP_PATH/bin/hipconfig --compiler`; + $HIP_RUNTIME= `perl $HIP_PATH/bin/hipconfig --runtime`; +} else { + $HIP_PLATFORM= `$HIP_PATH/bin/hipconfig --platform`; + $HIP_VERSION= `$HIP_PATH/bin/hipconfig --version`; + $HIP_COMPILER= `$HIP_PATH/bin/hipconfig --compiler`; + $HIP_RUNTIME= `$HIP_PATH/bin/hipconfig --runtime`; +} # If using ROCclr runtime, need to find HIP_ROCclr_HOME if (defined $HIP_RUNTIME and $HIP_RUNTIME eq "ROCclr" and !defined $HIP_ROCclr_HOME) { diff --git a/hipamd/bin/hipconfig b/hipamd/bin/hipconfig index ecd1449b2e..d26851f0db 100755 --- a/hipamd/bin/hipconfig +++ b/hipamd/bin/hipconfig @@ -85,6 +85,8 @@ $CUDA_PATH=$ENV{'CUDA_PATH'} // '/usr/local/cuda'; $HCC_HOME=$ENV{'HCC_HOME'} // "$ROCM_PATH/hcc"; $HSA_PATH=$ENV{'HSA_PATH'} // "$ROCM_PATH/hsa"; $HIP_CLANG_PATH=$ENV{'HIP_CLANG_PATH'} // "$ROCM_PATH/llvm/bin"; +# HIP_ROCclr_HOME is used by Windows builds +$HIP_ROCclr_HOME=$ENV{'HIP_ROCclr_HOME'}; #--- #HIP_PLATFORM controls whether to use NVCC or HCC for compilation: @@ -92,8 +94,9 @@ $HIP_PLATFORM=$ENV{'HIP_PLATFORM'}; # Read .hipInfo my %hipInfo = (); parse_config_file("$HIP_PATH/lib/.hipInfo", \%hipInfo); -$HIP_COMPILER = $hipInfo{'HIP_COMPILER'} // "hcc"; -$HIP_RUNTIME = $hipInfo{'HIP_RUNTIME'} // "HCC"; +# Prioritize Env first, otherwise use the hipInfo config file +$HIP_COMPILER = $ENV{'HIP_COMPILER'} // $hipInfo{'HIP_COMPILER'} // "hcc"; +$HIP_RUNTIME = $ENV{'HIP_RUNTIME'} // $hipInfo{'HIP_RUNTIME'} // "HCC"; if (not defined $HIP_PLATFORM) { if (can_run("$HCC_HOME/bin/hcc") or can_run("hcc")) { @@ -112,6 +115,10 @@ if ($HIP_COMPILER eq "hcc") { $CPP_CONFIG = " -D__HIP_PLATFORM_HCC__= -I$HIP_PATH/include -I$HCC_HOME/include -I$HSA_PATH/include"; } if ($HIP_COMPILER eq "clang") { + # Windows does not have clang at linux default path + if (defined $HIP_ROCclr_HOME and (-e "$HIP_ROCclr_HOME/bin/clang" or -e "$HIP_ROCclr_HOME/bin/clang.exe")) { + $HIP_CLANG_PATH = "$HIP_ROCclr_HOME/bin"; + } $HIP_CLANG_VERSION = `$HIP_CLANG_PATH/clang++ --version`; $HIP_CLANG_VERSION=~/.*clang version ([^ ]+).*/; $HIP_CLANG_VERSION=$1; From 72675bf07998da8ecbec29b7af7186d4b4d2cc8a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 30 Apr 2020 15:20:20 -0400 Subject: [PATCH 04/30] Don't write generated version.h to source directory Change-Id: Ib4e7c3171cd227725ae35fed66167df528a4c431 --- hipamd/.gitignore | 1 - hipamd/CMakeLists.txt | 7 +++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/hipamd/.gitignore b/hipamd/.gitignore index 64cdd493a6..fe07943cad 100644 --- a/hipamd/.gitignore +++ b/hipamd/.gitignore @@ -10,7 +10,6 @@ bin/hipInfo bin/hipBusBandwidth bin/hipDispatchLatency bin/hipify-clang -include/hip/hip_version.h tags samples/1_Utils/hipInfo/hipInfo samples/1_Utils/hipBusBandwidth/hipBusBandwidth diff --git a/hipamd/CMakeLists.txt b/hipamd/CMakeLists.txt index fdf019b9da..5c1c906d28 100755 --- a/hipamd/CMakeLists.txt +++ b/hipamd/CMakeLists.txt @@ -425,7 +425,8 @@ set(_versionInfoHeader #define HIP_VERSION (HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR)\n #endif\n ") -file(WRITE "${CMAKE_CURRENT_SOURCE_DIR}/include/hip/hip_version.h" ${_versionInfoHeader}) +file(WRITE "${PROJECT_BINARY_DIR}/include/hip/hip_version.h" ${_versionInfoHeader}) +include_directories(${PROJECT_BINARY_DIR}/include) # Build doxygen documentation find_program(DOXYGEN_EXE doxygen) @@ -468,7 +469,9 @@ endif() install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/hip/hcc_detail DESTINATION include/hip FILES_MATCHING PATTERN "*.h*") - +install(DIRECTORY ${PROJECT_BINARY_DIR}/include/hip + DESTINATION include + FILES_MATCHING PATTERN "*.h*") ############################# # hip-config From e069cf6511fc05167925da8b9bf523c80e6bc9c9 Mon Sep 17 00:00:00 2001 From: agodavar Date: Tue, 5 May 2020 06:12:58 -0400 Subject: [PATCH 05/30] SWDEV-234447:Fix error Use of uninitialized value in concatenation (.) Change-Id: Ia6f8d23f100826438f152c175d10ac3096ee024e --- hipamd/bin/hipcc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/bin/hipcc b/hipamd/bin/hipcc index b346be60e2..21275289f4 100755 --- a/hipamd/bin/hipcc +++ b/hipamd/bin/hipcc @@ -460,7 +460,7 @@ foreach $arg (@ARGV) $arg = "--cuda-device-only"; } - if(($trimarg eq '-stdlib=libstdc++') and ($setStdLib eq 0)) + if(($trimarg eq '-stdlib=libstdc++') and ($setStdLib eq 0) and $HIP_PLATFORM eq 'hcc' and $HIP_COMPILER eq 'hcc') { $HIPCXXFLAGS .= $HCC_WA_FLAGS; $setStdLib = 1; From 1ff46e24c5378bd22e97e5d3b7ee306990a927ff Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 1 May 2020 09:06:33 -0400 Subject: [PATCH 06/30] Don't add llvm includes to rocclr build There are no llvm includes here. Change-Id: I0567c6e47e717580819f3854782ba08501bd069d --- hipamd/rocclr/CMakeLists.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hipamd/rocclr/CMakeLists.txt b/hipamd/rocclr/CMakeLists.txt index 5158b7935e..c8136f45fd 100644 --- a/hipamd/rocclr/CMakeLists.txt +++ b/hipamd/rocclr/CMakeLists.txt @@ -93,10 +93,6 @@ list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules" ) set(CMAKE_MODULE_PATH${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules") include_directories(${ROCR_INCLUDES}) -if (DEFINED LLVM_INCLUDES AND NOT ${LLVM_INCLUDES} STREQUAL "") - message(STATUS "LLVM includes found ${LLVM_INCLUDES}") - include_directories(${LLVM_INCLUDES}) -endif() # if (DEFINED LLVM_INCLUDES AND NOT ${LLVM_INCLUDES} STREQUAL "") include_directories(${CMAKE_SOURCE_DIR}) include_directories(${CMAKE_SOURCE_DIR}/include) From 276bfc966765c7a509166112af81cfb059bee282 Mon Sep 17 00:00:00 2001 From: Vlad Sytchenko Date: Thu, 7 May 2020 19:12:10 -0400 Subject: [PATCH 07/30] Fix confusion in hipFuncGetAttribute() Cuda shared == OpenCL local Cuda local == OpenCL private Change-Id: I5a204945ecde35919b9e9def20bbb2662fffea2b --- hipamd/rocclr/hip_module.cpp | 7 +++---- hipamd/rocclr/hip_platform.cpp | 5 +++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hipamd/rocclr/hip_module.cpp b/hipamd/rocclr/hip_module.cpp index db39b234b4..95c6fc6475 100755 --- a/hipamd/rocclr/hip_module.cpp +++ b/hipamd/rocclr/hip_module.cpp @@ -314,8 +314,7 @@ hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunc switch(attrib) { case HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: - *value = static_cast(wrkGrpInfo->localMemSize_ - - wrkGrpInfo->privateMemSize_); + *value = static_cast(wrkGrpInfo->localMemSize_); break; case HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: *value = static_cast(wrkGrpInfo->wavefrontPerSIMD_ @@ -325,7 +324,7 @@ hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunc *value = 0; break; case HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: - *value = static_cast(wrkGrpInfo->localMemSize_); + *value = static_cast(wrkGrpInfo->privateMemSize_); break; case HIP_FUNC_ATTRIBUTE_NUM_REGS: *value = static_cast(wrkGrpInfo->availableGPRs_); @@ -340,7 +339,7 @@ hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunc *value = 0; break; case HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: - *value = static_cast(wrkGrpInfo->availableLDSSize_); + *value = static_cast(wrkGrpInfo->availableLDSSize_ - wrkGrpInfo->localMemSize_); break; case HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: *value = 0; diff --git a/hipamd/rocclr/hip_platform.cpp b/hipamd/rocclr/hip_platform.cpp index 11bd373550..b7768b7330 100755 --- a/hipamd/rocclr/hip_platform.cpp +++ b/hipamd/rocclr/hip_platform.cpp @@ -310,8 +310,9 @@ bool ihipGetFuncAttributes(const char* func_name, amd::Program* program, hipFunc } const device::Kernel::WorkGroupInfo* wginfo = it->second->workGroupInfo(); - func_attr->localSizeBytes = wginfo->localMemSize_; - func_attr->sharedSizeBytes = wginfo->size_; + func_attr->localSizeBytes = wginfo->privateMemSize_; + func_attr->sharedSizeBytes = wginfo->localMemSize_; + func_attr->maxDynamicSharedSizeBytes = wginfo->availableLDSSize_ - wginfo->localMemSize_; func_attr->maxThreadsPerBlock = wginfo->wavefrontSize_; func_attr->numRegs = wginfo->usedVGPRs_; From b5f9d2f8183ff3b461b60fd16a97b28ec16d993e Mon Sep 17 00:00:00 2001 From: Vlad Sytchenko Date: Fri, 8 May 2020 11:08:08 -0400 Subject: [PATCH 08/30] Correct HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK query We should be returning the max workgroup size calculated by the compiler. Change-Id: If86590efbb9b291f470bdbe87e5df992e661c539 --- hipamd/rocclr/hip_module.cpp | 3 +-- hipamd/rocclr/hip_platform.cpp | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/hipamd/rocclr/hip_module.cpp b/hipamd/rocclr/hip_module.cpp index 95c6fc6475..a26249eabb 100755 --- a/hipamd/rocclr/hip_module.cpp +++ b/hipamd/rocclr/hip_module.cpp @@ -317,8 +317,7 @@ hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunc *value = static_cast(wrkGrpInfo->localMemSize_); break; case HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: - *value = static_cast(wrkGrpInfo->wavefrontPerSIMD_ - * wrkGrpInfo->wavefrontSize_); + *value = static_cast(wrkGrpInfo->size_); break; case HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: *value = 0; diff --git a/hipamd/rocclr/hip_platform.cpp b/hipamd/rocclr/hip_platform.cpp index b7768b7330..c250daef57 100755 --- a/hipamd/rocclr/hip_platform.cpp +++ b/hipamd/rocclr/hip_platform.cpp @@ -313,7 +313,7 @@ bool ihipGetFuncAttributes(const char* func_name, amd::Program* program, hipFunc func_attr->localSizeBytes = wginfo->privateMemSize_; func_attr->sharedSizeBytes = wginfo->localMemSize_; func_attr->maxDynamicSharedSizeBytes = wginfo->availableLDSSize_ - wginfo->localMemSize_; - func_attr->maxThreadsPerBlock = wginfo->wavefrontSize_; + func_attr->maxThreadsPerBlock = wginfo->size_; func_attr->numRegs = wginfo->usedVGPRs_; return true; From 8c2c4c3b274044bcaae07198ee409e8c527acaaa Mon Sep 17 00:00:00 2001 From: Christophe Paquot Date: Fri, 8 May 2020 11:23:58 -0700 Subject: [PATCH 09/30] Don't add a null command to waitList in hipMemcpy SWDEV-235345 Change-Id: Ib1abd0ba6414d081891a9f5209df083c45734aee --- hipamd/rocclr/hip_memory.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/hipamd/rocclr/hip_memory.cpp b/hipamd/rocclr/hip_memory.cpp index 593513c98d..4bd75b8ffb 100755 --- a/hipamd/rocclr/hip_memory.cpp +++ b/hipamd/rocclr/hip_memory.cpp @@ -129,7 +129,10 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin amd::HostQueue* pQueue = &queue; if (queueDevice != dstMemory->getContext().devices()[0]) { pQueue = hip::getNullStream(dstMemory->getContext()); - waitList.push_back(queue.getLastQueuedCommand(true)); + amd::Command* cmd = queue.getLastQueuedCommand(true); + if (cmd != nullptr) { + waitList.push_back(cmd); + } } command = new amd::WriteMemoryCommand(*pQueue, CL_COMMAND_WRITE_BUFFER, waitList, *dstMemory->asBuffer(), dOffset, sizeBytes, src); @@ -138,7 +141,10 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin amd::HostQueue* pQueue = &queue; if (queueDevice != srcMemory->getContext().devices()[0]) { pQueue = hip::getNullStream(srcMemory->getContext()); - waitList.push_back(queue.getLastQueuedCommand(true)); + amd::Command* cmd = queue.getLastQueuedCommand(true); + if (cmd != nullptr) { + waitList.push_back(cmd); + } } command = new amd::ReadMemoryCommand(*pQueue, CL_COMMAND_READ_BUFFER, waitList, *srcMemory->asBuffer(), sOffset, sizeBytes, dst); From a88e52ba80cc0c7b642c4f762109ab0436a5b77f Mon Sep 17 00:00:00 2001 From: Vlad Sytchenko Date: Sat, 9 May 2020 12:42:30 -0400 Subject: [PATCH 10/30] Correct HIP_FUNC_ATTRIBUTE_NUM_REGS query Change-Id: I526cc7871c690260df0fa8c1b3b4b15fbc5af219 --- hipamd/rocclr/hip_module.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/rocclr/hip_module.cpp b/hipamd/rocclr/hip_module.cpp index a26249eabb..7cda6864c6 100755 --- a/hipamd/rocclr/hip_module.cpp +++ b/hipamd/rocclr/hip_module.cpp @@ -326,7 +326,7 @@ hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunc *value = static_cast(wrkGrpInfo->privateMemSize_); break; case HIP_FUNC_ATTRIBUTE_NUM_REGS: - *value = static_cast(wrkGrpInfo->availableGPRs_); + *value = static_cast(wrkGrpInfo->usedVGPRs_); break; case HIP_FUNC_ATTRIBUTE_PTX_VERSION: *value = 30; // Defaults to 3.0 as HCC From 9f31801831ab22c975b6cce7aa745687d8d828c6 Mon Sep 17 00:00:00 2001 From: Icarus Sparry Date: Mon, 4 May 2020 16:36:44 -0400 Subject: [PATCH 11/30] Correct quoting of arguments The hipcc script takes arguments and uses this to build up a new command. Characters which are special to the shell need to be quoted to prevent them being interpreted. In particular adding --Wl,--enable-new-dtags -Wl,--rpath,'$ORIGIN:$ORIGIN/../lib' to the command should pass quoted dollar signs into the resulting string so the shell passes them on, rather than substituting the values. The arguments are processed in a conventional loop, but can be altered during the course of the loop, and also by linker response files. Tested by running HIPCC_VERBOSE=7 HIP_COMPILER=clang hipcc --cxxflags \ fred.c -Wl,,--rpath,'$ORIGIN:$ORIGIN:/../lib' and observing "-Wl,--rpath,\$ORIGIN\:\$ORIGIN\:..\/lib" in the displayed hipcc-cmd output (and ignoring the errors due to rocm not being installed) Change-Id: I26b62f09ff3518cceeb85fa8823bb12a95c1c78e Signed-off-by: Icarus Sparry --- hipamd/bin/hipcc | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/hipamd/bin/hipcc b/hipamd/bin/hipcc index 21275289f4..1ed5ac6736 100755 --- a/hipamd/bin/hipcc +++ b/hipamd/bin/hipcc @@ -401,6 +401,7 @@ if($HIP_PLATFORM eq "nvcc"){ } } +# TODO: convert toolArgs to an array rather than a string my $toolArgs = ""; # arguments to pass to the hcc or nvcc tool my $optArg = ""; # -O args my $targetOpt = '--amdgpu-target='; @@ -410,7 +411,11 @@ my $prevArg = ""; # previous argument foreach $arg (@ARGV) { + # Save $arg, it can get changed in the loop. $trimarg = $arg; + # TODO: figure out why this space removal is wanted. + # TODO: If someone has gone to the effort of quoting the spaces to the shell + # TODO: why are we removing it here? $trimarg =~ s/^\s+|\s+$//g; # Remive whitespace my $swallowArg = 0; if ($arg eq '-c' or $arg eq '--genco' or $arg eq '-E') { @@ -419,6 +424,7 @@ foreach $arg (@ARGV) } if ($skipOutputFile) { + # TODO: handle filename with shell metacharacters $toolArgs .= " $arg"; $prevArg = $arg; $skipOutputFile = 0; @@ -667,6 +673,12 @@ foreach $arg (@ARGV) push (@inputs, $arg); #print "I: <$arg>\n"; } + # Produce a version of $arg where characters significant to the shell are + # quoted. One could quote everything of course but don't bother for + # common characters such as alphanumerics. + # Do the quoting here because sometimes the $arg is changed in the loop + # Important to have all of '-Xlinker' in the set of unquoted characters. + $arg =~ s/[^-a-zA-Z0-9=+,.]/\\$&/g; $toolArgs .= " $arg" unless $swallowArg; $prevArg = $arg; } @@ -813,6 +825,7 @@ if ($HIPCC_LINK_FLAGS_APPEND) { $HIPLDFLAGS .= " $HIPCC_LINK_FLAGS_APPEND"; } +# TODO: convert CMD to an array rather than a string my $CMD="$HIPCC"; if ($needCFLAGS) { From dcf94dcd2a1832034819c01fc7853b16ea442102 Mon Sep 17 00:00:00 2001 From: Dittakavi Satyanvesh Date: Fri, 8 May 2020 07:09:20 -0400 Subject: [PATCH 12/30] search path logic updated for extractkernel SWDEV-230929 Change-Id: I48d6332502774485d7ced3fee065a74f15774500 --- hipamd/bin/extractkernel | 31 ++++++++++++++++++++----------- hipamd/bin/hipconfig | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 53 insertions(+), 12 deletions(-) diff --git a/hipamd/bin/extractkernel b/hipamd/bin/extractkernel index 81760f50de..d12645a996 100755 --- a/hipamd/bin/extractkernel +++ b/hipamd/bin/extractkernel @@ -34,26 +34,35 @@ defined $options{i} || die("input not specified"); $input_file = $options{i}; (-f $input_file) || die("can't find $input_file"); +# derive HIP_PATH via env var or use parent directory of extractkernel +my $HIP_PATH=$ENV{'HIP_PATH'} // dirname(Cwd::abs_path("$0/../")); +my $HIP_COMPILER = `$HIP_PATH/bin/hipconfig --compiler`; +my $ROCM_PATH = `$HIP_PATH/bin/hipconfig --rocmpath`; +my $HIP_CLANG_PATH = `$HIP_PATH/bin/hipconfig --hipclangpath`; + # look for llvm-objdump and clang-offload-bundler my $tools_path_prefix; my $llvm_objdump; my $clang_offload_bundler; -if (defined $ENV{'HCC_HOME'}) { - $tools_path_prefix = File::Spec->catfile($ENV{'HCC_HOME'}, "bin"); - $llvm_objdump = File::Spec->catfile($tools_path_prefix, "llvm-objdump"); - $clang_offload_bundler = File::Spec->catfile($tools_path_prefix, "clang-offload-bundler"); +if (defined $HIP_COMPILER and $HIP_COMPILER eq "clang"){ + # Search the path with respect to HIP_CLANG_PATH + $tools_path_prefix = $HIP_CLANG_PATH; } else { - $tools_path_prefix = dirname(realpath($0)); - $llvm_objdump = File::Spec->catfile($tools_path_prefix, "llvm-objdump"); - $clang_offload_bundler = File::Spec->catfile($tools_path_prefix, "clang-offload-bundler"); - if (!(-f $llvm_objdump)) { - $tools_path_prefix = realpath($tools_path_prefix."/../../hcc/bin"); - $llvm_objdump = File::Spec->catfile($tools_path_prefix, "llvm-objdump"); - $clang_offload_bundler = File::Spec->catfile($tools_path_prefix, "clang-offload-bundler"); + if (defined $HIP_COMPILER and $HIP_COMPILER eq "hcc") { + # Search the path with respect to HCC_HOME if it is set, else search in ROCM_PATH + if (defined $ENV{'HCC_HOME'}) { + $tools_path_prefix = File::Spec->catfile($ENV{'HCC_HOME'}, "bin"); + } + else { + $tools_path_prefix = realpath($ROCM_PATH."/hcc/bin"); + } } } +# Find llvm-objdump and clang-offload-bundler in the path set above +$llvm_objdump = File::Spec->catfile($tools_path_prefix, "llvm-objdump"); +$clang_offload_bundler = File::Spec->catfile($tools_path_prefix, "clang-offload-bundler"); if (!(-f $llvm_objdump)) { $llvm_objdump = which("llvm-objdump"); diff --git a/hipamd/bin/hipconfig b/hipamd/bin/hipconfig index d26851f0db..ddcc70f410 100755 --- a/hipamd/bin/hipconfig +++ b/hipamd/bin/hipconfig @@ -17,9 +17,11 @@ Getopt::Long::Configure ( qw{bundling no_ignore_case}); GetOptions( "help|h" => \$p_help ,"path|p" => \$p_path + ,"rocmpath|R" => \$p_rocmpath ,"compiler|c" => \$p_compiler ,"platform|P" => \$p_platform ,"runtime|r" => \$p_runtime + ,"hipclangpath|l" => \$p_hipclangpath ,"cpp_config|cxx_config|C" => \$p_cpp_config ,"full|f|info" => \$p_full, ,"version|v" => \$p_version, @@ -30,10 +32,12 @@ GetOptions( if ($p_help) { print "usage: hipconfig [OPTIONS]\n"; print " --path, -p : print HIP_PATH (use env var if set, else determine from hipconfig path)\n"; + print " --rocmpath, -R : print ROCM_PATH (use env var if set, else determine from hip path or /opt/rocm)\n"; print " --cpp_config, -C : print C++ compiler options\n"; print " --compiler, -c : print compiler (hcc or clang or nvcc)\n"; print " --platform, -P : print platform (hcc or nvcc)\n"; print " --runtime, -r : print runtime (HCC or ROCclr)\n"; + print " --hipclangpath, -l : print HIP_CLANG_PATH\n"; print " --full, -f : print full config\n"; print " --version, -v : print hip version\n"; print " --check : check configuration\n"; @@ -88,16 +92,31 @@ $HIP_CLANG_PATH=$ENV{'HIP_CLANG_PATH'} // "$ROCM_PATH/llvm/bin"; # HIP_ROCclr_HOME is used by Windows builds $HIP_ROCclr_HOME=$ENV{'HIP_ROCclr_HOME'}; +if (defined $HIP_ROCclr_HOME) { + $HIP_INFO_PATH= "$HIP_ROCclr_HOME/lib/.hipInfo"; +} else { + $HIP_INFO_PATH= "$HIP_PATH/lib/.hipInfo"; # use actual file +} #--- #HIP_PLATFORM controls whether to use NVCC or HCC for compilation: $HIP_PLATFORM=$ENV{'HIP_PLATFORM'}; # Read .hipInfo my %hipInfo = (); -parse_config_file("$HIP_PATH/lib/.hipInfo", \%hipInfo); +parse_config_file("$HIP_INFO_PATH", \%hipInfo); # Prioritize Env first, otherwise use the hipInfo config file $HIP_COMPILER = $ENV{'HIP_COMPILER'} // $hipInfo{'HIP_COMPILER'} // "hcc"; $HIP_RUNTIME = $ENV{'HIP_RUNTIME'} // $hipInfo{'HIP_RUNTIME'} // "HCC"; +# If using ROCclr runtime, need to find HIP_ROCclr_HOME +if (defined $HIP_RUNTIME and $HIP_RUNTIME eq "ROCclr" and !defined $HIP_ROCclr_HOME) { + my $hipconfig_dir = dirname($0); + if (-e "$hipconfig_dir/../lib/bitcode") { + $HIP_ROCclr_HOME = abs_path($hipconfig_dir . "/.."); + } else { + $HIP_ROCclr_HOME = $HIP_PATH; # use HIP_PATH + } +} + if (not defined $HIP_PLATFORM) { if (can_run("$HCC_HOME/bin/hcc") or can_run("hcc")) { $HIP_PLATFORM = "hcc"; @@ -146,6 +165,11 @@ if ($p_path) { $printed = 1; } +if ($p_rocmpath) { + print "$ROCM_PATH"; + $printed = 1; +} + if ($p_cpp_config) { print $CPP_CONFIG; $printed = 1; @@ -166,6 +190,13 @@ if ($p_runtime) { $printed = 1; } +if ($p_hipclangpath) { + if (defined $HIP_CLANG_PATH) { + print $HIP_CLANG_PATH; + } + $printed = 1; +} + if ($p_version) { print $HIP_VERSION; $printed = 1; @@ -175,6 +206,7 @@ if (!$printed or $p_full) { print "HIP version : ", $HIP_VERSION, "\n\n"; print "== hipconfig\n"; print "HIP_PATH : ", $HIP_PATH, "\n"; + print "ROCM_PATH : ", $ROCM_PATH, "\n"; print "HIP_COMPILER : ", $HIP_COMPILER, "\n"; print "HIP_PLATFORM : ", $HIP_PLATFORM, "\n"; print "HIP_RUNTIME : ", $HIP_RUNTIME, "\n"; From 7c707dd41c85cbefcf956c01ffcd8cfe798ff07f Mon Sep 17 00:00:00 2001 From: Michael LIAO Date: Mon, 11 May 2020 10:12:33 -0400 Subject: [PATCH 13/30] [hip] Fix `-Wduplicate-decl-specifier` warning. NFC. Change-Id: Iae48bbb7805c39f1005c920df8e76504426f2d3b --- hipamd/include/hip/hcc_detail/math_functions.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/math_functions.h b/hipamd/include/hip/hcc_detail/math_functions.h index 11985c3242..494685e261 100644 --- a/hipamd/include/hip/hcc_detail/math_functions.h +++ b/hipamd/include/hip/hcc_detail/math_functions.h @@ -1411,12 +1411,12 @@ float func(float x, int y) \ __DEF_FLOAT_FUN2I(scalbn) template -__DEVICE__ inline static T min(T arg1, T arg2) { +__DEVICE__ inline T min(T arg1, T arg2) { return (arg1 < arg2) ? arg1 : arg2; } template -__DEVICE__ inline static T max(T arg1, T arg2) { +__DEVICE__ inline T max(T arg1, T arg2) { return (arg1 > arg2) ? arg1 : arg2; } From 03ebfd2d498fe3ef656d7bf7866dab0c7af89341 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 8 May 2020 12:29:10 -0400 Subject: [PATCH 14/30] Remove some asm declarations for intrinsics This technique should never be used, and only accessed through __builtins. There's currently no builtin for groupstaticsize. I left ds_swizzle since for some reason it switches to the builtin based on __HCC__ or not. Change-Id: If1e1394221dba83ea4add6db5e94d6b715552044 --- .../include/hip/hcc_detail/device_functions.h | 29 +++++++++-------- .../include/hip/hcc_detail/llvm_intrinsics.h | 31 +------------------ hipamd/tests/src/deviceLib/hip_mbcnt.cpp | 6 ++-- 3 files changed, 20 insertions(+), 46 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/device_functions.h b/hipamd/include/hip/hcc_detail/device_functions.h index eaee437cea..7bc0b97617 100644 --- a/hipamd/include/hip/hcc_detail/device_functions.h +++ b/hipamd/include/hip/hcc_detail/device_functions.h @@ -85,11 +85,11 @@ __device__ static inline unsigned int __ffsll(long long int input) { } __device__ static inline unsigned int __brev(unsigned int input) { - return __llvm_bitrev_b32(input); + return __builtin_bitreverse32(input); } __device__ static inline unsigned long long int __brevll(unsigned long long int input) { - return __llvm_bitrev_b64(input); + return __builtin_bitreverse64(input); } __device__ static inline unsigned int __lastbit_u32_u64(uint64_t input) { @@ -233,7 +233,10 @@ __device__ static inline unsigned int __usad(unsigned int x, unsigned int y, uns return __ockl_sadd_u32(x, y, z); } -__device__ static inline unsigned int __lane_id() { return __mbcnt_hi(-1, __mbcnt_lo(-1, 0)); } +__device__ static inline unsigned int __lane_id() { + return __builtin_amdgcn_mbcnt_hi( + -1, __builtin_amdgcn_mbcnt_lo(-1, 0)); +} /* HIP specific device functions @@ -241,25 +244,25 @@ HIP specific device functions __device__ static inline unsigned __hip_ds_bpermute(int index, unsigned src) { union { int i; unsigned u; float f; } tmp; tmp.u = src; - tmp.i = __llvm_amdgcn_ds_bpermute(index, tmp.i); + tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i); return tmp.u; } __device__ static inline float __hip_ds_bpermutef(int index, float src) { union { int i; unsigned u; float f; } tmp; tmp.f = src; - tmp.i = __llvm_amdgcn_ds_bpermute(index, tmp.i); + tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i); return tmp.f; } __device__ static inline unsigned __hip_ds_permute(int index, unsigned src) { union { int i; unsigned u; float f; } tmp; tmp.u = src; - tmp.i = __llvm_amdgcn_ds_permute(index, tmp.i); + tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i); return tmp.u; } __device__ static inline float __hip_ds_permutef(int index, float src) { union { int i; unsigned u; float f; } tmp; tmp.u = src; - tmp.i = __llvm_amdgcn_ds_permute(index, tmp.i); + tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i); return tmp.u; } @@ -293,8 +296,8 @@ __device__ static inline float __hip_ds_swizzlef_N(float src) { template __device__ static inline int __hip_move_dpp_N(int src) { - return __llvm_amdgcn_move_dpp(src, dpp_ctrl, row_mask, bank_mask, - bound_ctrl); + return __builtin_amdgcn_mov_dpp(src, dpp_ctrl, row_mask, bank_mask, + bound_ctrl); } static constexpr int warpSize = 64; @@ -304,7 +307,7 @@ inline int __shfl(int var, int src_lane, int width = warpSize) { int self = __lane_id(); int index = src_lane + (self & ~(width-1)); - return __llvm_amdgcn_ds_bpermute(index<<2, var); + return __builtin_amdgcn_ds_bpermute(index<<2, var); } __device__ inline @@ -376,7 +379,7 @@ int __shfl_up(int var, unsigned int lane_delta, int width = warpSize) { int self = __lane_id(); int index = self - lane_delta; index = (index < (self & ~(width-1)))?self:index; - return __llvm_amdgcn_ds_bpermute(index<<2, var); + return __builtin_amdgcn_ds_bpermute(index<<2, var); } __device__ inline @@ -446,7 +449,7 @@ int __shfl_down(int var, unsigned int lane_delta, int width = warpSize) { int self = __lane_id(); int index = self + lane_delta; index = (int)((self&(width-1))+lane_delta) >= width?self:index; - return __llvm_amdgcn_ds_bpermute(index<<2, var); + return __builtin_amdgcn_ds_bpermute(index<<2, var); } __device__ inline @@ -516,7 +519,7 @@ int __shfl_xor(int var, int lane_mask, int width = warpSize) { int self = __lane_id(); int index = self^lane_mask; index = index >= ((self+width)&~(width-1))?self:index; - return __llvm_amdgcn_ds_bpermute(index<<2, var); + return __builtin_amdgcn_ds_bpermute(index<<2, var); } __device__ inline diff --git a/hipamd/include/hip/hcc_detail/llvm_intrinsics.h b/hipamd/include/hip/hcc_detail/llvm_intrinsics.h index dc6fd05c52..330b3d91c2 100644 --- a/hipamd/include/hip/hcc_detail/llvm_intrinsics.h +++ b/hipamd/include/hip/hcc_detail/llvm_intrinsics.h @@ -31,40 +31,11 @@ THE SOFTWARE. #include "hip/hcc_detail/host_defines.h" -__device__ -__attribute__((convergent)) -ulong __llvm_amdgcn_icmp_i32(uint x, uint y, uint z) __asm("llvm.amdgcn.icmp.i32"); - +// FIXME: These should all be removed and proper builtins used. __device__ unsigned __llvm_amdgcn_groupstaticsize() __asm("llvm.amdgcn.groupstaticsize"); -__device__ -unsigned int __llvm_bitrev_b32(unsigned int src0) __asm("llvm.bitreverse.i32"); - -__device__ -uint64_t __llvm_bitrev_b64(uint64_t src0) __asm("llvm.bitreverse.i64"); - -extern -__device__ -__attribute__((const)) -unsigned int __mbcnt_lo(unsigned int x, unsigned int y) __asm("llvm.amdgcn.mbcnt.lo"); - -extern -__device__ -__attribute__((const)) -unsigned int __mbcnt_hi(unsigned int x, unsigned int y) __asm("llvm.amdgcn.mbcnt.hi"); - -__device__ -int __llvm_amdgcn_ds_bpermute(int index, int src) __asm("llvm.amdgcn.ds.bpermute"); - -__device__ -int __llvm_amdgcn_ds_permute(int index, int src) __asm("llvm.amdgcn.ds.permute"); - __device__ int __llvm_amdgcn_ds_swizzle(int index, int pattern) __asm("llvm.amdgcn.ds.swizzle"); -__device__ -int __llvm_amdgcn_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, - bool bound_ctrl) __asm("llvm.amdgcn.mov.dpp.i32"); - #endif diff --git a/hipamd/tests/src/deviceLib/hip_mbcnt.cpp b/hipamd/tests/src/deviceLib/hip_mbcnt.cpp index cd4bfa5daa..2cb958f280 100644 --- a/hipamd/tests/src/deviceLib/hip_mbcnt.cpp +++ b/hipamd/tests/src/deviceLib/hip_mbcnt.cpp @@ -38,11 +38,11 @@ THE SOFTWARE. __global__ void HIP_kernel(unsigned int* mbcnt_lo, unsigned int* mbcnt_hi, unsigned int* lane_id) { int x = blockDim.x * blockIdx.x + threadIdx.x; - mbcnt_lo[x] = __mbcnt_lo(0xFFFFFFFF, 0); - mbcnt_hi[x] = __mbcnt_hi(0xFFFFFFFF, 0); + mbcnt_lo[x] = __builtin_amdgcn_mbcnt_lo(0xFFFFFFFF, 0); + mbcnt_hi[x] = __builtin_amdgcn_mbcnt_hi(0xFFFFFFFF, 0); lane_id[x] = __lane_id(); } - + using namespace std; int main() { From 28cb3e5496b3c3dd50b10d9cfcdefcf57a4fff3e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 1 May 2020 09:05:56 -0400 Subject: [PATCH 15/30] Start cleaning up rocclr include paths Use target_include_directories instead of global includes. Change-Id: I3adadc905843f152a548c71b2f12701476065207 --- hipamd/rocclr/CMakeLists.txt | 94 +++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 43 deletions(-) diff --git a/hipamd/rocclr/CMakeLists.txt b/hipamd/rocclr/CMakeLists.txt index c8136f45fd..cc3b06a6f7 100644 --- a/hipamd/rocclr/CMakeLists.txt +++ b/hipamd/rocclr/CMakeLists.txt @@ -66,25 +66,6 @@ add_custom_target(gen-prof-api-str-header ALL SOURCES ${PROF_API_HDR}) # Enable profiling API -if(USE_PROF_API EQUAL 1) - find_path(PROF_API_HEADER_DIR prof_protocol.h - HINTS - ${PROF_API_HEADER_PATH} - PATHS - /opt/rocm/roctracer - PATH_SUFFIXES - include/ext - ) - if(NOT PROF_API_HEADER_DIR) - MESSAGE(WARNING "Profiling API header not found. Disabling roctracer integration. Use -DPROF_API_HEADER_PATH=") - else() - add_definitions(-DUSE_PROF_API=1) - include_directories(${PROF_API_HEADER_DIR}) - MESSAGE(STATUS "Profiling API: ${PROF_API_HEADER_DIR}") - endif() -endif() - - if(NOT DEFINED ROCclr_DIR OR NOT DEFINED LIBOCL_STATIC_DIR OR NOT DEFINED LIBROCclr_STATIC_DIR ) # message(FATAL_ERROR "define ROCclr_DIR, LIBOCL_STATIC_DIR\n") @@ -92,31 +73,18 @@ endif() list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules" ) set(CMAKE_MODULE_PATH${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules") -include_directories(${ROCR_INCLUDES}) - -include_directories(${CMAKE_SOURCE_DIR}) -include_directories(${CMAKE_SOURCE_DIR}/include) -include_directories(${PROJECT_BINARY_DIR}/include) -include_directories(${CMAKE_SOURCE_DIR}/elfio) -include_directories(${CMAKE_SOURCE_DIR}/amdocl) -include_directories(${CMAKE_SOURCE_DIR}/include/hip/hcc_detail/elfio) -include_directories(${ROCclr_DIR}) -include_directories(${ROCclr_DIR}/include) -include_directories(${ROCclr_DIR}/compiler/lib) -include_directories(${ROCclr_DIR}/compiler/lib/include) -include_directories(${ROCclr_DIR}/elf/utils/common) -include_directories(${ROCclr_DIR}/elf/utils/libelf) add_definitions(-DUSE_COMGR_LIBRARY -DCOMGR_DYN_DLL) - find_package(amd_comgr REQUIRED CONFIG - PATHS - /opt/rocm/ - PATH_SUFFIXES - cmake/amd_comgr - lib/cmake/amd_comgr - ) - MESSAGE(STATUS "Code Object Manager found at ${amd_comgr_DIR}.") -include_directories("$") +find_package(amd_comgr REQUIRED CONFIG + PATHS + /opt/rocm/ + PATH_SUFFIXES + cmake/amd_comgr + lib/cmake/amd_comgr) + +message(STATUS "Code Object Manager found at ${amd_comgr_DIR}.") + +include(${LIBROCclr_STATIC_DIR}/amdrocclr_staticTargets.cmake) add_definitions(-DBSD_LIBELF) @@ -143,6 +111,47 @@ add_library(hip64 OBJECT ) set_target_properties(hip64 PROPERTIES POSITION_INDEPENDENT_CODE ON) +target_include_directories(hip64 + PUBLIC + ${PROJECT_SOURCE_DIR}/include + ${PROJECT_BINARY_DIR}/include + PRIVATE + ${CMAKE_SOURCE_DIR}/elfio + ${PROJECT_SOURCE_DIR} + ${PROJECT_SOURCE_DIR}/amdocl + ${PROJECT_SOURCE_DIR}/include/hip/hcc_detail/elfio + ${ROCclr_DIR} + ${ROCclr_DIR}/include + ${ROCclr_DIR}/compiler/lib + ${ROCclr_DIR}/compiler/lib/include + ${ROCclr_DIR}/elf/utils/common + ${ROCclr_DIR}/elf/utils/libelf + ${ROCR_INCLUDES} + $) +target_compile_definitions(hip64 + PRIVATE + $) + + + # Enable profiling API +if(USE_PROF_API EQUAL 1) + find_path(PROF_API_HEADER_DIR prof_protocol.h + HINTS + ${PROF_API_HEADER_PATH} + PATHS + /opt/rocm/roctracer + PATH_SUFFIXES + include/ext) + + if(NOT PROF_API_HEADER_DIR) + message(WARNING "Profiling API header not found. Disabling roctracer integration. Use -DPROF_API_HEADER_PATH=") + else() + target_compile_definitions(hip64 PUBLIC USE_PROF_API=1) + target_include_directories(hip64 PUBLIC ${PROF_API_HEADER_DIR}) + message(STATUS "Profiling API: ${PROF_API_HEADER_DIR}") + endif() +endif() + set_target_properties( hip64 PROPERTIES CXX_STANDARD 14 @@ -153,7 +162,6 @@ add_dependencies(hip64 gen-prof-api-str-header) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -include(${LIBROCclr_STATIC_DIR}/amdrocclr_staticTargets.cmake) add_library(amdhip64 SHARED $ From 005a033c5317be9ba0c69171f923235103f7efa8 Mon Sep 17 00:00:00 2001 From: Payam Date: Mon, 11 May 2020 16:41:35 -0400 Subject: [PATCH 16/30] updating hipDeviceProps_t-< arch flags according to SWDEV-234277 Change-Id: I6238edf9a2df15a5dfe420dbb8723e10a0725f98 --- hipamd/rocclr/hip_device.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hipamd/rocclr/hip_device.cpp b/hipamd/rocclr/hip_device.cpp index 3476ac14fc..8695ef43e5 100644 --- a/hipamd/rocclr/hip_device.cpp +++ b/hipamd/rocclr/hip_device.cpp @@ -188,13 +188,13 @@ hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) deviceProps.arch.hasGlobalFloatAtomicExch = 1; deviceProps.arch.hasSharedInt32Atomics = 1; deviceProps.arch.hasSharedFloatAtomicExch = 1; - deviceProps.arch.hasFloatAtomicAdd = 0; + deviceProps.arch.hasFloatAtomicAdd = 1; deviceProps.arch.hasGlobalInt64Atomics = 1; deviceProps.arch.hasSharedInt64Atomics = 1; deviceProps.arch.hasDoubles = 1; - deviceProps.arch.hasWarpVote = 0; - deviceProps.arch.hasWarpBallot = 0; - deviceProps.arch.hasWarpShuffle = 0; + deviceProps.arch.hasWarpVote = 1; + deviceProps.arch.hasWarpBallot = 1; + deviceProps.arch.hasWarpShuffle = 1; deviceProps.arch.hasFunnelShift = 0; deviceProps.arch.hasThreadFenceSystem = 1; deviceProps.arch.hasSyncThreadsExt = 0; From 3c855b543ae62c622437c803f436596ae45b8d08 Mon Sep 17 00:00:00 2001 From: Icarus Sparry Date: Mon, 11 May 2020 14:28:37 -0400 Subject: [PATCH 17/30] Fix runtime failure on windows Signed-off-by: Icarus Sparry Change-Id: I6c991a943e04ef29eff2905becbed0038c3b8ab9 Signed-off-by: Icarus Sparry --- hipamd/bin/hipcc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hipamd/bin/hipcc b/hipamd/bin/hipcc index 1ed5ac6736..79db36703f 100755 --- a/hipamd/bin/hipcc +++ b/hipamd/bin/hipcc @@ -678,7 +678,9 @@ foreach $arg (@ARGV) # common characters such as alphanumerics. # Do the quoting here because sometimes the $arg is changed in the loop # Important to have all of '-Xlinker' in the set of unquoted characters. - $arg =~ s/[^-a-zA-Z0-9=+,.]/\\$&/g; + if (not $isWindows) { # Windows needs different quoting, ignore for now + $arg =~ s/[^-a-zA-Z0-9_=+,.\/]/\\$&/g; + } $toolArgs .= " $arg" unless $swallowArg; $prevArg = $arg; } From 163fdd6b74d620eb08f498b74f2752bc48a144b1 Mon Sep 17 00:00:00 2001 From: Christophe Paquot Date: Mon, 11 May 2020 16:35:13 -0700 Subject: [PATCH 18/30] HPC : Intermittent hangs are observed while running Gromacs benchmarks SWDEV-235579 Move the lock before destroying the queue as there's a multithreaded race condition if the queue is being destroy and right after we set queue_ to nullptr, another thread can call ihipWaitStreams which will then call create on that same stream because queue is now nullptr. Moving the lock on streamSet prevents this from happening because we would remove the stream from that list and therefore ihipWait will not try to call asHostQueue which tries to create the queue if not created yet since the stream won't be in the list anymore Change-Id: I3108657ab403d39d4123e83294fcf1f0880e5563 --- hipamd/rocclr/hip_stream.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hipamd/rocclr/hip_stream.cpp b/hipamd/rocclr/hip_stream.cpp index e4bf4fe192..3bd7d343f7 100644 --- a/hipamd/rocclr/hip_stream.cpp +++ b/hipamd/rocclr/hip_stream.cpp @@ -23,7 +23,7 @@ #include "hip_event.hpp" #include "thread/monitor.hpp" -static amd::Monitor streamSetLock("Guards global stream set"); +static amd::Monitor streamSetLock{"Guards global stream set"}; static std::unordered_set streamSet; // Internal structure for stream callback handler @@ -83,11 +83,11 @@ amd::HostQueue* Stream::asHostQueue(bool skip_alloc) { // ================================================================================================ void Stream::Destroy() { if (queue_ != nullptr) { - queue_->release(); - queue_ = nullptr; - amd::ScopedLock lock(streamSetLock); streamSet.erase(this); + + queue_->release(); + queue_ = nullptr; } delete this; } From a92dc8c25c091896beea2a6a9a7e49777d47288f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 1 May 2020 09:15:30 -0400 Subject: [PATCH 19/30] Use project relative include dir for generated header paths Change-Id: I8c0834d68e6dac00b41a7059a978cbd2ebb933f9 --- hipamd/rocclr/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hipamd/rocclr/CMakeLists.txt b/hipamd/rocclr/CMakeLists.txt index cc3b06a6f7..b27100b029 100644 --- a/hipamd/rocclr/CMakeLists.txt +++ b/hipamd/rocclr/CMakeLists.txt @@ -48,9 +48,8 @@ set(PROF_API_HEADER_PATH ${ROCclr_DIR}/platform) # Profiling API support ############################# # Generate profiling API macros/structures header -# FIXME: This should not be writing to the source directory set(PROF_API_STR "${PROJECT_BINARY_DIR}/include/hip/hcc_detail/hip_prof_str.h") -set(PROF_API_HDR "${CMAKE_CURRENT_SOURCE_DIR}/../include/hip/hcc_detail/hip_runtime_api.h") +set(PROF_API_HDR "${PROJECT_SOURCE_DIR}/include/hip/hcc_detail/hip_runtime_api.h") set(PROF_API_SRC "${CMAKE_CURRENT_SOURCE_DIR}") set(PROF_API_GEN "${CMAKE_CURRENT_SOURCE_DIR}/hip_prof_gen.py") set(PROF_API_LOG "${PROJECT_BINARY_DIR}/hip_prof_gen.log.txt") From aaf30025671bc4e25ce671da693ac17ac6444f0b Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Tue, 12 May 2020 09:14:58 -0400 Subject: [PATCH 20/30] SWDEV-227602 - Fixing hipFuncGetAttribute parameters Change-Id: I46bd079372f453cabfaa2c709e2b30c69400dd33 --- hipamd/rocclr/hip_platform.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/hipamd/rocclr/hip_platform.cpp b/hipamd/rocclr/hip_platform.cpp index c250daef57..1378a697e0 100755 --- a/hipamd/rocclr/hip_platform.cpp +++ b/hipamd/rocclr/hip_platform.cpp @@ -309,12 +309,20 @@ bool ihipGetFuncAttributes(const char* func_name, amd::Program* program, hipFunc return false; } - const device::Kernel::WorkGroupInfo* wginfo = it->second->workGroupInfo(); + const device::Kernel* kernel = it->second; + const device::Kernel::WorkGroupInfo* wginfo = kernel->workGroupInfo(); + func_attr->sharedSizeBytes = static_cast(wginfo->localMemSize_); + func_attr->binaryVersion = static_cast(kernel->signature().version()); + func_attr->cacheModeCA = 0; + func_attr->constSizeBytes = 0; func_attr->localSizeBytes = wginfo->privateMemSize_; - func_attr->sharedSizeBytes = wginfo->localMemSize_; - func_attr->maxDynamicSharedSizeBytes = wginfo->availableLDSSize_ - wginfo->localMemSize_; - func_attr->maxThreadsPerBlock = wginfo->size_; - func_attr->numRegs = wginfo->usedVGPRs_; + func_attr->maxDynamicSharedSizeBytes = static_cast(wginfo->availableLDSSize_ + - wginfo->localMemSize_); + + func_attr->maxThreadsPerBlock = static_cast(wginfo->size_); + func_attr->numRegs = static_cast(wginfo->usedVGPRs_); + func_attr->preferredShmemCarveout = 0; + func_attr->ptxVersion = 30; return true; } From 1f0c398a6f76c032c991fff9d13851bf1250cd68 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 30 Apr 2020 09:48:37 -0400 Subject: [PATCH 21/30] Use find_package to find ROCclr Maintain compatability with the old finding for now for the convenience of commit order. Change-Id: I99b236cbb3d61b00650e3da7fe5931d4c4b3fec6 --- hipamd/rocclr/CMakeLists.txt | 53 +++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/hipamd/rocclr/CMakeLists.txt b/hipamd/rocclr/CMakeLists.txt index b27100b029..aaee0a4d0b 100644 --- a/hipamd/rocclr/CMakeLists.txt +++ b/hipamd/rocclr/CMakeLists.txt @@ -27,22 +27,37 @@ endif() set(USE_PROF_API "1") -if(NOT DEFINED LIBROCclr_STATIC_DIR) - find_path(LIBROCclr_STATIC_DIR - NAMES libamdrocclr_static.a - PATHS /opt/rocm/rocclr - PATH_SUFFIXES lib - ) +# FIXME: Make this required and remove the legacy handling below +set(save_rocclr_dir ${ROCclr_DIR}) +set(save_rocclr_static_dir ${LIBROCclr_STATIC_DIR}) + +find_package(ROCclr CONFIG + PATHS + /opt/rocm + /opt/rocm/rocclr) + +if (NOT ROCclr_FOUND) + if(NOT DEFINED LIBROCclr_STATIC_DIR) + find_path(LIBROCclr_STATIC_DIR + NAMES libamdrocclr_static.a + PATHS /opt/rocm/rocclr + PATH_SUFFIXES lib) + else() + set(LIBROCclr_STATIC_DIR ${save_rocclr_static_dir}) + endif() + + if(NOT DEFINED ROCclr_DIR) + find_path(ROCclr_DIR + NAMES top.hpp + PATH_SUFFIXES include + PATHS /opt/rocm/rocclr) + else() + set(ROCclr_DIR ${save_rocclr_dir}) + endif() + message("Found Static rocclr lib:${LIBROCclr_STATIC_DIR} and rocclr includes: ${ROCclr_DIR}") + include(${LIBROCclr_STATIC_DIR}/amdrocclr_staticTargets.cmake) endif() -if(NOT DEFINED ROCclr_DIR) - find_path(ROCclr_DIR - NAMES top.hpp - PATH_SUFFIXES include - PATHS /opt/rocm/rocclr - ) -endif() -message("Found Static rocclr lib:${LIBROCclr_STATIC_DIR} and rocclr includes: ${ROCclr_DIR}") set(PROF_API_HEADER_PATH ${ROCclr_DIR}/platform) ############################# # Profiling API support @@ -83,8 +98,6 @@ find_package(amd_comgr REQUIRED CONFIG message(STATUS "Code Object Manager found at ${amd_comgr_DIR}.") -include(${LIBROCclr_STATIC_DIR}/amdrocclr_staticTargets.cmake) - add_definitions(-DBSD_LIBELF) add_library(hip64 OBJECT @@ -131,6 +144,14 @@ target_compile_definitions(hip64 PRIVATE $) +if(ROCclr_FOUND) + target_include_directories(hip64 + PRIVATE + $) + target_compile_definitions(hip64 + PRIVATE + $) +endif() # Enable profiling API if(USE_PROF_API EQUAL 1) From 5b60eee02ef6df81ac0d78dad134883b992cbc81 Mon Sep 17 00:00:00 2001 From: Saleel Kudchadker Date: Tue, 12 May 2020 09:37:56 -0700 Subject: [PATCH 22/30] SWDEV-235495 Fix elapsed time calculation This issue happens because we getLastQueuedCommand when recording the event and do end_ - start_ so it takes the ticks for the completion of the last command before event record. This may not happen if one records a marker command for hipEventRecord Change-Id: I1d6b06a5befb3b93f16b67692c59dca25c982e0f --- hipamd/rocclr/hip_event.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/rocclr/hip_event.cpp b/hipamd/rocclr/hip_event.cpp index f2738169a8..dddadd1bfd 100644 --- a/hipamd/rocclr/hip_event.cpp +++ b/hipamd/rocclr/hip_event.cpp @@ -91,7 +91,7 @@ hipError_t Event::elapsedTime(Event& eStop, float& ms) { } ms = static_cast(static_cast(eStop.event_->profilingInfo().end_ - - event_->profilingInfo().start_))/1000000.f; + event_->profilingInfo().end_))/1000000.f; return hipSuccess; } From 374fd36f317bb839387050a20bb690cd2c223d80 Mon Sep 17 00:00:00 2001 From: Christophe Paquot Date: Tue, 12 May 2020 15:51:52 -0700 Subject: [PATCH 23/30] Add lock to addFatBinary and removeFatBinary In case hipModule(Un)Load is called from different thread as hipInit we need to grab the lock as both are going to modify modules_ Also add some logging for __hipExtractCodeObjectFromFatBinary in case binary isn't found for GPU SWDEV-236032 Change-Id: Icbd72b412502df80d5066cea42a4fbcd5b0b8a98 --- hipamd/rocclr/hip_internal.hpp | 2 ++ hipamd/rocclr/hip_platform.cpp | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/hipamd/rocclr/hip_internal.hpp b/hipamd/rocclr/hip_internal.hpp index 643a43341b..4cc0dadd8a 100755 --- a/hipamd/rocclr/hip_internal.hpp +++ b/hipamd/rocclr/hip_internal.hpp @@ -212,6 +212,7 @@ public: void init(); std::vector>* addFatBinary(const void*data) { + amd::ScopedLock lock(lock_); if (initialized_) { digestFatBinary(data, modules_[data]); } @@ -219,6 +220,7 @@ public: } void removeFatBinary(std::vector>* module) { + amd::ScopedLock lock(lock_); for (auto& mod : modules_) { if (&mod.second == module) { modules_.erase(&mod); diff --git a/hipamd/rocclr/hip_platform.cpp b/hipamd/rocclr/hip_platform.cpp index 1378a697e0..4b94d01045 100755 --- a/hipamd/rocclr/hip_platform.cpp +++ b/hipamd/rocclr/hip_platform.cpp @@ -122,10 +122,12 @@ hipError_t __hipExtractCodeObjectFromFatBinary(const void* data, num_code_objs++; } } - if (num_code_objs == devices.size()) + if (num_code_objs == devices.size()) { return hipSuccess; - else + } else { + DevLogError("hipErrorNoBinaryForGpu: Coudn't find binary for current devices!"); return hipErrorNoBinaryForGpu; + } } extern "C" std::vector>* __hipRegisterFatBinary(const void* data) From 2cc0fb93817895aeef44dda6342071d0948228a3 Mon Sep 17 00:00:00 2001 From: Christophe Paquot Date: Wed, 13 May 2020 00:26:19 -0700 Subject: [PATCH 24/30] Make sure to remove the global var from amd::MemObjMap We need this otherwise ROCr can give us a matching address for another allocation and doing "insert" in ROCclr will not update the map with the newest object. We would then end up using stale objects (yikes) SWDEV-234992 Change-Id: I3475adf9781a9309d64a024fae45181d7e5afb04 --- hipamd/rocclr/hip_platform.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hipamd/rocclr/hip_platform.cpp b/hipamd/rocclr/hip_platform.cpp index 4b94d01045..e23b136cc0 100755 --- a/hipamd/rocclr/hip_platform.cpp +++ b/hipamd/rocclr/hip_platform.cpp @@ -229,6 +229,11 @@ std::vector< std::pair >* PlatformState::unregisterVar(hipMod = reinterpret_cast *>(dvar.shadowVptr); delete tex_hptr; } + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + if (dvar.rvars[dev].getdeviceptr()) { + amd::MemObjMap::RemoveMemObj(dvar.rvars[dev].getdeviceptr()); + } + } vars_.erase(it++); } else { ++it; From 5067cd1dd1c03512ba39d6bccde411c7966f2be8 Mon Sep 17 00:00:00 2001 From: Vlad Sytchenko Date: Tue, 12 May 2020 16:53:18 -0400 Subject: [PATCH 25/30] Correctly check if env vars are set Change-Id: I365da786c822e9395aec5c6b75753ba297da3f64 --- hipamd/cmake/FindHIP.cmake | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/hipamd/cmake/FindHIP.cmake b/hipamd/cmake/FindHIP.cmake index cc7f4af20c..498b5e4570 100644 --- a/hipamd/cmake/FindHIP.cmake +++ b/hipamd/cmake/FindHIP.cmake @@ -615,23 +615,25 @@ macro(HIP_ADD_EXECUTABLE hip_target) endif() if("${HIP_COMPILER}" STREQUAL "hcc") if("x${HCC_HOME}" STREQUAL "x") - if (DEFINED $ENV{ROCM_PATH}) - set(HCC_HOME "$ENV{ROCM_PATH}/hcc") - elseif( DEFINED $ENV{HIP_PATH}) - set(HCC_HOME "$ENV{HIP_PATH}/../hcc") + if (DEFINED ENV{ROCM_PATH}) + set(HCC_HOME "$ENV{ROCM_PATH}/hcc") + elseif(DEFINED ENV{HIP_PATH}) + set(HCC_HOME "$ENV{HIP_PATH}/../hcc") else() - set(HCC_HOME "/opt/rocm/hcc") + set(HCC_HOME "/opt/rocm/hcc") endif() endif() set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} -o ") elseif("${HIP_COMPILER}" STREQUAL "clang") - if("x${HIP_CLANG_PATH}" STREQUAL "x") - if (DEFINED $ENV{ROCM_PATH}) - set(HIP_CLANG_PATH "$ENV{ROCM_PATH}/llvm/bin") - elseif( DEFINED $ENV{HIP_PATH}) - set(HIP_CLANG_PATH "$ENV{HIP_PATH}/../llvm/bin") + if("x${HIP_CLANG_PATH}" STREQUAL "x") + if(DEFINED ENV{HIP_CLANG_PATH}) + set(HIP_CLANG_PATH $ENV{HIP_CLANG_PATH}) + elseif(DEFINED ENV{ROCM_PATH}) + set(HIP_CLANG_PATH "$ENV{ROCM_PATH}/llvm/bin") + elseif(DEFINED ENV{HIP_PATH}) + set(HIP_CLANG_PATH "$ENV{HIP_PATH}/../llvm/bin") else() - set(HIP_CLANG_PATH "/opt/rocm/llvm/bin") + set(HIP_CLANG_PATH "/opt/rocm/llvm/bin") endif() endif() set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HIP_CLANG_PATH} ${HIP_CLANG_PARALLEL_BUILD_LINK_OPTIONS} -o ") From 999295b3faf7b96a39733d295ec7a3b3ce9f0f04 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Wed, 13 May 2020 14:12:20 -0400 Subject: [PATCH 26/30] Correct mgpu coop launch test When the original size is devided accross all GPUs rounding can occur, causing incorrect validation. Readjust the final value for comparison to the new size accordingly. Change-Id: I9b42149e33dfcb328de7419e546a0202a69a8610 --- .../module/hipLaunchCoopMultiKernel.cpp | 51 +++++++++---------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/hipamd/tests/src/runtimeApi/module/hipLaunchCoopMultiKernel.cpp b/hipamd/tests/src/runtimeApi/module/hipLaunchCoopMultiKernel.cpp index 102387cbe7..8e67044eb0 100644 --- a/hipamd/tests/src/runtimeApi/module/hipLaunchCoopMultiKernel.cpp +++ b/hipamd/tests/src/runtimeApi/module/hipLaunchCoopMultiKernel.cpp @@ -37,18 +37,13 @@ THE SOFTWARE. using namespace std::chrono; -const static uint NumOfLoopIterrations = 16 * 1024; -const static uint BufferSizeInDwords = 28672 * NumOfLoopIterrations; +const static uint BufferSizeInDwords = 256 * 1024 * 1024; const static uint numQueues = 4; const static uint numIter = 100; constexpr uint NumKernelArgs = 4; constexpr uint MaxGPUs = 8; #include -/* -namespace cg = cooperative_groups; -using namespace cooperative_groups; -*/ __global__ void test_gws(uint* buf, uint bufSize, long* tmpBuf, long* result) { @@ -126,11 +121,13 @@ int main() { size_t SIZE = copySizeInDwords * sizeof(uint); HIPCHECK(hipMalloc((void**)&dA[i], SIZE)); + HIPCHECK(hipMalloc((void**)&dB[i], 64 * deviceProp[i].multiProcessorCount * sizeof(long))); if (i == 0) { HIPCHECK(hipHostMalloc((void**)&dC, (nGpu + 1) * sizeof(long), hipHostMallocCoherent)); } HIPCHECK(hipMemcpy(dA[i], &init[i * copySizeInDwords] , SIZE, hipMemcpyHostToDevice)); HIPCHECK(hipStreamCreate(&stream[i])); + hipDeviceSynchronize(); } dim3 dimBlock; @@ -146,22 +143,22 @@ int main() { uint workgroups[3] = {64, 128, 256}; hipLaunchParams* launchParamsList = new hipLaunchParams[nGpu]; - - system_clock::time_point start = system_clock::now(); - + std::time_t end_time; + double time = 0; for (uint set = 0; set < 3; ++set) { void* args[MaxGPUs * NumKernelArgs]; - std::cout << "---------- Test#" << set << "---------------\n"; + std::cout << "---------- Test#" << set << ", size: "<< BufferSizeInDwords << + " dwords ---------------\n"; for (int i = 0; i < nGpu; i++) { HIPCHECK(hipSetDevice(i)); dimBlock.x = workgroups[set]; HIPCHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, test_gws, dimBlock.x * dimBlock.y * dimBlock.z, dimBlock.x * sizeof(long))); - - std::cout << "GPU(" << i << ") Block size: " << dimBlock.x << " Num blocks per CU: " << numBlocks << "\n"; + + std::cout << "GPU(" << i << ") Block size: " << dimBlock.x << + " Num blocks per CU: " << numBlocks << "\n"; dimGrid.x = deviceProp[i].multiProcessorCount * std::min(numBlocks, 32); - HIPCHECK(hipMalloc((void**)&dB[i], dimGrid.x * sizeof(long))); args[i * NumKernelArgs] = (void*)&dA[i]; args[i * NumKernelArgs + 1] = (void*)©SizeInDwords; @@ -175,32 +172,34 @@ int main() { launchParamsList[i].stream = stream[i]; launchParamsList[i].args = &args[i * NumKernelArgs]; } - - hipLaunchCooperativeKernelMultiDevice(launchParamsList, nGpu, 0); - if (*dC != (((long)(BufferSizeInDwords) * (BufferSizeInDwords - 1)) / 2)) { - std::cout << "Data validation failed for grid size = " << dimGrid.x << " and block size = " << dimBlock.x << "\n"; + system_clock::time_point start = system_clock::now(); + hipLaunchCooperativeKernelMultiDevice(launchParamsList, nGpu, 0); + system_clock::time_point end = system_clock::now(); + std::chrono::duration elapsed_seconds = end - start; + end_time = std::chrono::system_clock::to_time_t(end); + + time += elapsed_seconds.count(); + + size_t processedDwords = copySizeInDwords * nGpu; + if (*dC != (((long)(processedDwords) * (processedDwords - 1)) / 2)) { + std::cout << "Data validation failed ("<< *dC << " != " << + (((long)(BufferSizeInDwords) * (BufferSizeInDwords - 1)) / 2) << + ") for grid size = " << dimGrid.x << " and block size = " << dimBlock.x << "\n"; std::cout << "Test failed! \n"; } - for (int i = 0; i < nGpu; i++) { - hipFree(dB[i]); - } } - system_clock::time_point end = system_clock::now(); delete [] launchParamsList; - std::chrono::duration elapsed_seconds = end - start; - - std::time_t end_time = std::chrono::system_clock::to_time_t(end); - std::cout << "finished computation at " << std::ctime(&end_time) << - "elapsed time: " << elapsed_seconds.count() << "s\n"; + "elapsed time: " << time << "s\n"; hipSetDevice(0); hipFree(dC); for (int i = 0; i < nGpu; i++) { hipFree(dA[i]); + hipFree(dB[i]); HIPCHECK(hipStreamDestroy(stream[i])); } delete [] init; From e61b8dc425878449a7b6a58de5bb066e9a35bc83 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Wed, 13 May 2020 18:53:31 +0000 Subject: [PATCH 27/30] Fix missing hip_prof_str.h in hip-base package Change-Id: Icd163ff83fadc2ee0e68f5755c652a45d2e781e5 --- hipamd/CMakeLists.txt | 9 +++++++-- hipamd/packaging/hip-base.txt | 4 ++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/hipamd/CMakeLists.txt b/hipamd/CMakeLists.txt index 5c1c906d28..16c3f11edf 100755 --- a/hipamd/CMakeLists.txt +++ b/hipamd/CMakeLists.txt @@ -234,7 +234,7 @@ endif (NOT CPACK_SET_DESTDIR) # Generate profiling API macros/structures header if(HIP_PLATFORM STREQUAL "hcc") if(USE_PROF_API EQUAL 1) -set(PROF_API_STR "${CMAKE_CURRENT_SOURCE_DIR}/include/hip/hcc_detail/hip_prof_str.h") +set(PROF_API_STR "${PROJECT_BINARY_DIR}/include/hip/hcc_detail/hip_prof_str.h") set(PROF_API_HDR "${CMAKE_CURRENT_SOURCE_DIR}/include/hip/hcc_detail/hip_runtime_api.h") set(PROF_API_SRC "${CMAKE_CURRENT_SOURCE_DIR}/src") set(PROF_API_GEN "${CMAKE_CURRENT_SOURCE_DIR}/hip_prof_gen.py") @@ -315,6 +315,7 @@ endif() message(STATUS "\nHSA runtime in: " ${HSA_PATH}) # Build hip_hcc if platform is hcc if(HIP_PLATFORM STREQUAL "hcc") + include_directories(${PROJECT_BINARY_DIR}/include) include_directories(${PROJECT_SOURCE_DIR}/include) set(HIP_HCC_BUILD_FLAGS) @@ -458,7 +459,11 @@ install(FILES ${PROJECT_BINARY_DIR}/.hipVersion DESTINATION bin) execute_process(COMMAND test ${CMAKE_INSTALL_PREFIX} -ef ${CMAKE_CURRENT_SOURCE_DIR} RESULT_VARIABLE INSTALL_SOURCE) if(NOT ${INSTALL_SOURCE} EQUAL 0) - install(DIRECTORY src DESTINATION .) + if(HIP_RUNTIME STREQUAL "HCC") + install(DIRECTORY src DESTINATION .) + elseif(HIP_RUNTIME STREQUAL "ROCclr") + install(DIRECTORY rocclr DESTINATION .) + endif() install(DIRECTORY bin DESTINATION . USE_SOURCE_PERMISSIONS) install(DIRECTORY include DESTINATION .) install(DIRECTORY cmake DESTINATION .) diff --git a/hipamd/packaging/hip-base.txt b/hipamd/packaging/hip-base.txt index fc8becf84f..971b2ce018 100644 --- a/hipamd/packaging/hip-base.txt +++ b/hipamd/packaging/hip-base.txt @@ -3,6 +3,10 @@ project(hip_base) install(DIRECTORY @hip_SOURCE_DIR@/bin DESTINATION . USE_SOURCE_PERMISSIONS) install(DIRECTORY @hip_SOURCE_DIR@/include DESTINATION .) +install(FILES @PROJECT_BINARY_DIR@/include/hip/hcc_detail/hip_prof_str.h + DESTINATION include/hip) +install(FILES @PROJECT_BINARY_DIR@/include/hip/hip_version.h + DESTINATION include) install(FILES @PROJECT_BINARY_DIR@/.hipVersion DESTINATION bin) install(PROGRAMS @PROJECT_BINARY_DIR@/lpl DESTINATION bin) install(PROGRAMS @PROJECT_BINARY_DIR@/ca DESTINATION bin) From 6152b200cbb4a10d07db52cd058116f1f8b53520 Mon Sep 17 00:00:00 2001 From: Payam Date: Wed, 13 May 2020 17:17:48 -0400 Subject: [PATCH 28/30] adding support to disable lazy loading, HIP_ENABLE_LAZY_KERNEL_LOADING Change-Id: Iafe0f396570ee2bccf642b537cbf9aad967e1370 --- hipamd/rocclr/hip_platform.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hipamd/rocclr/hip_platform.cpp b/hipamd/rocclr/hip_platform.cpp index e23b136cc0..8759ef47f1 100755 --- a/hipamd/rocclr/hip_platform.cpp +++ b/hipamd/rocclr/hip_platform.cpp @@ -191,6 +191,13 @@ void PlatformState::init() for (auto& it : vars_) { it.second.rvars.resize(g_devices.size()); } + if (!HIP_ENABLE_LAZY_KERNEL_LOADING) { + for (size_t i = 0; i < g_devices.size(); ++i) { + for (auto& it: functions_) { + getFunc(it.first, i); + } + } + } } bool PlatformState::unregisterFunc(hipModule_t hmod) { From 4638f0f11cd1ed9a894ebe33251d538694893ce8 Mon Sep 17 00:00:00 2001 From: Payam Date: Thu, 14 May 2020 12:58:26 -0400 Subject: [PATCH 29/30] updating envar to HIP_ROCCLR_HOME Change-Id: Ic0bbf40638538307377e6db1a7379fb23dec8306 --- hipamd/bin/hipcc | 34 +++++++++++++++++----------------- hipamd/bin/hipconfig | 20 ++++++++++---------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/hipamd/bin/hipcc b/hipamd/bin/hipcc index 79db36703f..fc6834997e 100755 --- a/hipamd/bin/hipcc +++ b/hipamd/bin/hipcc @@ -26,7 +26,7 @@ use Cwd 'abs_path'; # script's abs_path). Used on AMD platforms only. # HSA_PATH : Path to HSA dir (defaults to ../../hsa relative to abs_path # of this script). Used on AMD platforms only. -# HIP_ROCclr_HOME : Path to HIP/ROCclr directory. Used on AMD platforms only. +# HIP_ROCCLR_HOME : Path to HIP/ROCclr directory. Used on AMD platforms only. # HIP_CLANG_PATH : Path to HIP-Clang (default to ../../llvm/bin relative to this # script's abs_path). Used on AMD platforms only. @@ -82,15 +82,15 @@ if (-e "$HIP_PATH/../.info/version") { } else { $ROCM_PATH=$ENV{'ROCM_PATH'} // "/opt/rocm"; } -$HIP_ROCclr_HOME=$ENV{'HIP_ROCclr_HOME'}; +$HIP_ROCCLR_HOME=$ENV{'HIP_ROCCLR_HOME'}; $HIP_LIB_PATH=$ENV{'HIP_LIB_PATH'}; $HIP_CLANG_PATH=$ENV{'HIP_CLANG_PATH'}; $DEVICE_LIB_PATH=$ENV{'DEVICE_LIB_PATH'}; $HIP_CLANG_HCC_COMPAT_MODE=$ENV{'HIP_CLANG_HCC_COMPAT_MODE'}; # HCC compatibility mode $HIP_COMPILE_CXX_AS_HIP=$ENV{'HIP_COMPILE_CXX_AS_HIP'} // "1"; -if (defined $HIP_ROCclr_HOME) { - $HIP_INFO_PATH= "$HIP_ROCclr_HOME/lib/.hipInfo"; +if (defined $HIP_ROCCLR_HOME) { + $HIP_INFO_PATH= "$HIP_ROCCLR_HOME/lib/.hipInfo"; } else { $HIP_INFO_PATH= "$HIP_PATH/lib/.hipInfo"; # use actual file } @@ -138,28 +138,28 @@ if ($isWindows) { $HIP_RUNTIME= `$HIP_PATH/bin/hipconfig --runtime`; } -# If using ROCclr runtime, need to find HIP_ROCclr_HOME -if (defined $HIP_RUNTIME and $HIP_RUNTIME eq "ROCclr" and !defined $HIP_ROCclr_HOME) { +# If using ROCclr runtime, need to find HIP_ROCCLR_HOME +if (defined $HIP_RUNTIME and $HIP_RUNTIME eq "ROCclr" and !defined $HIP_ROCCLR_HOME) { my $hipcc_dir = dirname($0); if (-e "$hipcc_dir/../lib/bitcode") { - $HIP_ROCclr_HOME = abs_path($hipcc_dir . "/.."); + $HIP_ROCCLR_HOME = abs_path($hipcc_dir . "/.."); } else { - $HIP_ROCclr_HOME = $HIP_PATH; # use HIP_PATH + $HIP_ROCCLR_HOME = $HIP_PATH; # use HIP_PATH } $HIPCXXFLAGS .= "-D__HIP_ROCclr__"; $HIPCFLAGS .= "-D__HIP_ROCclr__"; } -if (defined $HIP_ROCclr_HOME) { - if (!defined $HIP_CLANG_PATH and (-e "$HIP_ROCclr_HOME/bin/clang" or -e "$HIP_ROCclr_HOME/bin/clang.exe")) { - $HIP_CLANG_PATH = "$HIP_ROCclr_HOME/bin"; +if (defined $HIP_ROCCLR_HOME) { + if (!defined $HIP_CLANG_PATH and (-e "$HIP_ROCCLR_HOME/bin/clang" or -e "$HIP_ROCCLR_HOME/bin/clang.exe")) { + $HIP_CLANG_PATH = "$HIP_ROCCLR_HOME/bin"; } - if (!defined $DEVICE_LIB_PATH and -e "$HIP_ROCclr_HOME/lib/bitcode") { - $DEVICE_LIB_PATH = "$HIP_ROCclr_HOME/lib/bitcode"; + if (!defined $DEVICE_LIB_PATH and -e "$HIP_ROCCLR_HOME/lib/bitcode") { + $DEVICE_LIB_PATH = "$HIP_ROCCLR_HOME/lib/bitcode"; } - $HIP_INCLUDE_PATH = "$HIP_ROCclr_HOME/include"; + $HIP_INCLUDE_PATH = "$HIP_ROCCLR_HOME/include"; if (!defined $HIP_LIB_PATH) { - $HIP_LIB_PATH = "$HIP_ROCclr_HOME/lib"; + $HIP_LIB_PATH = "$HIP_ROCCLR_HOME/lib"; } } @@ -207,8 +207,8 @@ if ($HIP_PLATFORM eq "hcc" and $HIP_COMPILER eq "clang") { $HIP_LIB_PATH = "$HIP_PATH/lib"; } if ($verbose & 0x2) { - if (defined $HIP_ROCclr_HOME) { - print ("HIP_ROCclr_HOME=$HIP_ROCclr_HOME\n"); + if (defined $HIP_ROCCLR_HOME) { + print ("HIP_ROCCLR_HOME=$HIP_ROCCLR_HOME\n"); } print ("HIP_CLANG_PATH=$HIP_CLANG_PATH\n"); print ("HIP_CLANG_INCLUDE_PATH=$HIP_CLANG_INCLUDE_PATH\n"); diff --git a/hipamd/bin/hipconfig b/hipamd/bin/hipconfig index ddcc70f410..9b10bf7110 100755 --- a/hipamd/bin/hipconfig +++ b/hipamd/bin/hipconfig @@ -89,11 +89,11 @@ $CUDA_PATH=$ENV{'CUDA_PATH'} // '/usr/local/cuda'; $HCC_HOME=$ENV{'HCC_HOME'} // "$ROCM_PATH/hcc"; $HSA_PATH=$ENV{'HSA_PATH'} // "$ROCM_PATH/hsa"; $HIP_CLANG_PATH=$ENV{'HIP_CLANG_PATH'} // "$ROCM_PATH/llvm/bin"; -# HIP_ROCclr_HOME is used by Windows builds -$HIP_ROCclr_HOME=$ENV{'HIP_ROCclr_HOME'}; +# HIP_ROCCLR_HOME is used by Windows builds +$HIP_ROCCLR_HOME=$ENV{'HIP_ROCCLR_HOME'}; -if (defined $HIP_ROCclr_HOME) { - $HIP_INFO_PATH= "$HIP_ROCclr_HOME/lib/.hipInfo"; +if (defined $HIP_ROCCLR_HOME) { + $HIP_INFO_PATH= "$HIP_ROCCLR_HOME/lib/.hipInfo"; } else { $HIP_INFO_PATH= "$HIP_PATH/lib/.hipInfo"; # use actual file } @@ -107,13 +107,13 @@ parse_config_file("$HIP_INFO_PATH", \%hipInfo); $HIP_COMPILER = $ENV{'HIP_COMPILER'} // $hipInfo{'HIP_COMPILER'} // "hcc"; $HIP_RUNTIME = $ENV{'HIP_RUNTIME'} // $hipInfo{'HIP_RUNTIME'} // "HCC"; -# If using ROCclr runtime, need to find HIP_ROCclr_HOME -if (defined $HIP_RUNTIME and $HIP_RUNTIME eq "ROCclr" and !defined $HIP_ROCclr_HOME) { +# If using ROCclr runtime, need to find HIP_ROCCLR_HOME +if (defined $HIP_RUNTIME and $HIP_RUNTIME eq "ROCclr" and !defined $HIP_ROCCLR_HOME) { my $hipconfig_dir = dirname($0); if (-e "$hipconfig_dir/../lib/bitcode") { - $HIP_ROCclr_HOME = abs_path($hipconfig_dir . "/.."); + $HIP_ROCCLR_HOME = abs_path($hipconfig_dir . "/.."); } else { - $HIP_ROCclr_HOME = $HIP_PATH; # use HIP_PATH + $HIP_ROCCLR_HOME = $HIP_PATH; # use HIP_PATH } } @@ -135,8 +135,8 @@ if ($HIP_COMPILER eq "hcc") { } if ($HIP_COMPILER eq "clang") { # Windows does not have clang at linux default path - if (defined $HIP_ROCclr_HOME and (-e "$HIP_ROCclr_HOME/bin/clang" or -e "$HIP_ROCclr_HOME/bin/clang.exe")) { - $HIP_CLANG_PATH = "$HIP_ROCclr_HOME/bin"; + if (defined $HIP_ROCCLR_HOME and (-e "$HIP_ROCCLR_HOME/bin/clang" or -e "$HIP_ROCCLR_HOME/bin/clang.exe")) { + $HIP_CLANG_PATH = "$HIP_ROCCLR_HOME/bin"; } $HIP_CLANG_VERSION = `$HIP_CLANG_PATH/clang++ --version`; $HIP_CLANG_VERSION=~/.*clang version ([^ ]+).*/; From 1a48abfdd6f344fe6547efe3c54a9ad502859532 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Thu, 14 May 2020 21:14:05 +0000 Subject: [PATCH 30/30] Fix missing hip_version.h install and package Change-Id: I7182518f8c0dea6e00ffd649d1a29027fdc78b24 --- hipamd/packaging/hip-base.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hipamd/packaging/hip-base.txt b/hipamd/packaging/hip-base.txt index 971b2ce018..0923f0c8fd 100644 --- a/hipamd/packaging/hip-base.txt +++ b/hipamd/packaging/hip-base.txt @@ -4,9 +4,9 @@ project(hip_base) install(DIRECTORY @hip_SOURCE_DIR@/bin DESTINATION . USE_SOURCE_PERMISSIONS) install(DIRECTORY @hip_SOURCE_DIR@/include DESTINATION .) install(FILES @PROJECT_BINARY_DIR@/include/hip/hcc_detail/hip_prof_str.h - DESTINATION include/hip) + DESTINATION include/hip/hcc_detail) install(FILES @PROJECT_BINARY_DIR@/include/hip/hip_version.h - DESTINATION include) + DESTINATION include/hip) install(FILES @PROJECT_BINARY_DIR@/.hipVersion DESTINATION bin) install(PROGRAMS @PROJECT_BINARY_DIR@/lpl DESTINATION bin) install(PROGRAMS @PROJECT_BINARY_DIR@/ca DESTINATION bin)