From ccd4e85fc9a9dd2c16792f899d252b8ef7f693ab Mon Sep 17 00:00:00 2001 From: Ramesh Errabolu Date: Thu, 5 Mar 2020 13:44:22 -0600 Subject: [PATCH] Extend Rocr Visible Devices functionality to include UUIDs Change-Id: Ia2892e4033717556a422fe33dec0294fe2ca9e28 [ROCm/ROCR-Runtime commit: 89f7ef224ca6a8d12d73654c1e1258f6b0c7e175] --- .../runtime/hsa-runtime/CMakeLists.txt | 1 + .../hsa-runtime/core/inc/amd_filter_device.h | 213 ++++++++++++++ .../core/runtime/amd_filter_device.cpp | 272 ++++++++++++++++++ .../hsa-runtime/core/runtime/amd_topology.cpp | 100 +------ .../runtime/hsa-runtime/core/util/flag.h | 1 + .../runtime/hsa-runtime/core/util/utils.h | 19 ++ 6 files changed, 521 insertions(+), 85 deletions(-) create mode 100644 projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_filter_device.h create mode 100644 projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_filter_device.cpp mode change 100755 => 100644 projects/rocr-runtime/runtime/hsa-runtime/core/util/utils.h diff --git a/projects/rocr-runtime/runtime/hsa-runtime/CMakeLists.txt b/projects/rocr-runtime/runtime/hsa-runtime/CMakeLists.txt index 04ea8c3d9d..2d00a6f0c6 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/CMakeLists.txt +++ b/projects/rocr-runtime/runtime/hsa-runtime/CMakeLists.txt @@ -137,6 +137,7 @@ set ( SRCS "core/util/lnx/os_linux.cpp" "core/runtime/amd_loader_context.cpp" "core/runtime/hsa_ven_amd_loader.cpp" "core/runtime/amd_memory_region.cpp" + "core/runtime/amd_filter_device.cpp" "core/runtime/amd_topology.cpp" "core/runtime/default_signal.cpp" "core/runtime/host_queue.cpp" diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_filter_device.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_filter_device.h new file mode 100644 index 0000000000..f7205a0fcd --- /dev/null +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_filter_device.h @@ -0,0 +1,213 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef HSA_RUNTIME_CORE_INC_AMD_FILTER_DEVICE_H_ +#define HSA_RUNTIME_CORE_INC_AMD_FILTER_DEVICE_H_ + +#include +#include +#include +#include +#include +#include + +#include "hsakmt.h" + +namespace amd { + +// ROCr allows users to filter and reorder various Gpu devices that are +// present on ROCm system. This ability is made available via environment +// variable ROCR_VISIBLE_DEVICES (RVD). Users are allowed to specify a list +// of Gpu Identifiers separated by comma delimiter as the value of this env +// variable. +// +// On a ROCm platform instance, a Gpu device could be identified by its: +// +// Index - Position at which ROCr reports it upon device enumeration +// UUID - A string that is unique and is immutable i.e. tags Gpu +// instance across systems and power cycles. UUID values +// are defined to begin with "GPU-" prefix +// +// @note: Not all Gpu devices will report valid UUID's. For example, +// Only devices from Gfx9 and later will encode valid UUID's. To account +// for this and other reasons, the UUID string "GPU-XX" is defined as +// indicating those devices. Users can still select those Gpu devices +// by using their enumeration index +// +// Users are allowed to select a device by specifying its UUID string in +// full or part. A UUID string that does not uniquely match an agent's +// valid UUID prefix is interpreted as terminating. The UUID string +// "GPU-XX" will not match and therefore will terminate +// +// RVD interpreter treats an empty token list as filtering all devices. +// Users can use this mode to report ZERO Gpu devices +// +// RVD interpreter treats a token as Illegal if can't be evaluated into an +// instance of Device UUID or Enumeration Index +// +// RVD interpreter treats a Legal instance of Enumeration Index as Terminating +// if any ONE of the following conditions apply: +// Value of index lies outside the interval [0 - (numGpuDevices - 1)] +// Value of index maps to a device that has been previously selected +// +// RVD interpreter treats a Legal instance of Device UUID as Terminating +// if any ONE of the following conditions apply: +// Value of UUID is the literal "GPU-XX" +// Value of UUID matches ZERO devices on system +// Value of UUID matches TWO or more devices on system +// Value of UUID maps to a device that has been previously selected +// +// RVD interpreter builds the list of Gpu devices to surface using tokens +// that are Legal and NOT Terminating +// +// Following are some examples of RVD value strings and their intepretation +// on a ROCm system with four Gpu devices. Assume for now the UUID's of the +// four Gpu devices are: +// Gpu-0: "GPU-BABABABABABABABA" +// Gpu-1: "GPU-ABBAABBAABBAABBA" +// Gpu-2: "GPU-BABAABBAABBABABA" +// Gpu-3: "GPU-ABBABABABABAABBA" +// +// Surface ZERO devices +// A1) ROCR_VISIBLE_DEVICES="" +// A2) ROCR_VISIBLE_DEVICES="-1" +// A3) ROCR_VISIBLE_DEVICES="GPU-XX" +// +// Surface Gpu-3 and Gpu-0 devices in that order +// B) ROCR_VISIBLE_DEVICES="3,GPU-BABABABABABABABA,4" +// +// Surface Gpu-1 and Gpu-2 devices in that order +// C) ROCR_VISIBLE_DEVICES="1,GPU-ABBAABBAABBAABBA,GPU-XX" +// +// Surface Gpu-3 and Gpu-2 devices in that order +// D) ROCR_VISIBLE_DEVICES="3,GPU-BABAABBA,GPU-XX" +// +class RvdFilter { + public: + /// @brief Constructor + RvdFilter() {} + + // @brief Destructor. + ~RvdFilter() {} + + /// @brief Determine if user has specified environment variable + /// ROCR_VISIBLE_DEVICES (RVD) to filter and reorder Gpu devices + /// + /// @return TRUE if user has defined the env RVD + static bool FilterDevices(); + + /// @brief Determine if user has specified environment variable + /// ROCR_VISIBLE_DEVICES (RVD) to filter out all Gpu devices i.e. + /// surface ZERO devices + /// + /// @return TRUE if user has specified ZERO to be surfaced + bool SelectZeroDevices(); + + /// @brief Builds the list of tokens specified by user to filter + /// and reorder Gpu devices. A token represents either a Gpu's + /// enumeration index or its UUID value. It is possible for the + /// list to have no tokens i.e. user has selected zero devices + void BuildRvdTokenList(); + + /// @brief Build the list of Gpu device UUIDs as enumerated by ROCt + /// + /// @param numNodes Number of ROCm devices present on system, includes + /// both Cpu and Gpu's devices + void BuildDeviceUuidList(uint32_t numNodes); + + /// @brief Build the list of Gpu devices that will be enumerated to user + /// + /// @return Number of Gpu devices to surface upon devices enumeration + uint32_t BuildUsrDeviceList(); + + /// @brief Processes UUID token and returns its enumeration index + /// + /// @param token RVD token encoding a device's UUID value + /// @return int32_t if it is valid, -1 otherwise + int32_t ProcessUuidToken(const std::string& token); + + /// @brief Get the number of Gpu devices that will be surface + /// upon device enumeration + /// + /// @uint32_t Number of devices to enumerate including possibly + /// ZERO devices + uint32_t GetUsrDeviceListSize(); + + /// @brief Return the rank of queried Gpu device. If queried device + /// is surfaced the number of Gpu devices that will be surface + /// upon device enumeration + /// + /// @int32_t -1 if queried device is not surfaced, else a value in + /// the range [0 - (numGpus - 1)] + int32_t GetUsrDeviceRank(uint32_t roctIdx); + +#ifndef NDEBUG + /// @brief Set debug UUID values to Gpu devices. This is intended to + /// help debug and test RVD module functionality + void SetDeviceUuidList(); + + /// @brief Print the list of Uuids of Gpu devices present on system + void PrintDeviceUuidList(); + + /// @brief Print the list of Gpu devices per their enumeration order + void PrintUsrDeviceList(); + + /// @brief Print the list of tokens specified by user to filter + /// and reorder Gpu devices + void PrintRvdTokenList(); +#endif + + private: + /// @brief List of tokens specified by user to select and reorder + std::vector rvdTokenList_; + + /// @brief Ordered list of ROCt enumerated Gpu device's UUID values + std::vector devUuidList_; + + /// @brief Ordered list of ROCr enumerated Gpu devices + std::map usrDeviceList_; + +}; // End of class RvdFilter + +} // namespace amd + +#endif // header guard - HSA_RUNTIME_CORE_INC_AMD_FILTER_DEVICE_H_ diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_filter_device.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_filter_device.cpp new file mode 100644 index 0000000000..04087fb111 --- /dev/null +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_filter_device.cpp @@ -0,0 +1,272 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#include "core/inc/amd_filter_device.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hsakmt.h" + +#include "core/util/utils.h" +#include "core/inc/runtime.h" +#include "core/inc/amd_cpu_agent.h" +#include "core/inc/amd_gpu_agent.h" +#include "core/inc/amd_memory_region.h" + +namespace amd { + +bool RvdFilter::FilterDevices() { + return core::Runtime::runtime_singleton_->flag().filter_visible_gpus(); +} + +bool RvdFilter::SelectZeroDevices() { + const std::string& envVal = core::Runtime::runtime_singleton_->flag().visible_gpus(); + return envVal.empty(); +} + +void RvdFilter::BuildRvdTokenList() { + // Determine if user has chosen ZERO devices to be surfaced + const std::string& envVal = core::Runtime::runtime_singleton_->flag().visible_gpus(); + if (envVal.empty()) { + return; + } + + // Parse env value into tokens separated by comma (',') delimiter + std::string token; + char separator = ','; + std::stringstream stream(envVal); + while (getline(stream, token, separator)) { + std::transform(token.begin(), token.end(), token.begin(), ::toupper); + token = trim(token); + rvdTokenList_.push_back(token); + } +} + +void RvdFilter::BuildDeviceUuidList(uint32_t numNodes) { + HSAKMT_STATUS status; + HsaNodeProperties props = {0}; + for (HSAuint32 idx = 0; idx < numNodes; idx++) { + // Query for node properties and ignore Cpu devices + status = hsaKmtGetNodeProperties(idx, &props); + if (status != HSAKMT_STATUS_SUCCESS) { + continue; + } + if (props.NumFComputeCores == 0) { + continue; + } + + // For devices whose UUID is zero build a string that + // will not match user provided value + if (props.UniqueID == 0) { + devUuidList_.push_back("Invalid-UUID"); + continue; + } + + // For devices that support valid UUID values capture UUID + // value into a upper case hex string of length 16 including + // leading zeros if necessary + std::stringstream stream; + stream << "GPU-" << std::setfill('0') << std::setw(sizeof(uint64_t) * 2) << std::hex + << props.UniqueID; + std::string uuidVal(stream.str()); + std::transform(uuidVal.begin(), uuidVal.end(), uuidVal.begin(), ::toupper); + devUuidList_.push_back(uuidVal); + } +} + +int32_t RvdFilter::ProcessUuidToken(const std::string& token) { + // Determine if token exceeds max length of a UUID string + uint32_t tokenLen = token.length(); + if ((tokenLen < 5) || (tokenLen > 20)) { + return -1; + } + + // Track the number of devices user token matches + int32_t devIdx = -1; + int32_t compareVal = -1; + uint32_t numGpus = devUuidList_.size(); + for (uint32_t idx = 0; idx < numGpus; idx++) { + uint32_t uuidLen = devUuidList_[idx].length(); + + // Token could match UUID of another device + if (tokenLen > uuidLen) { + compareVal = -1; + continue; + } + + // Token could match as substring of device UUID + compareVal = token.compare(0, tokenLen, devUuidList_[idx], 0, tokenLen); + + // Check if user Uuid matches with ROCt Uuid + if (compareVal == 0) { + if (devIdx != -1) { + return -1; + } + devIdx = idx; + } + } + + // Return value includes possibility of both + // finding or not finding a device + return devIdx; +} + +uint32_t RvdFilter::BuildUsrDeviceList() { + // Get number of Gpu devices and user specified tokens + uint32_t numGpus = devUuidList_.size(); + uint32_t loopCnt = std::min(numGpus, uint32_t(rvdTokenList_.size())); + + // Evaluate tokens into device index or UUID values + int32_t usrIdx = 0; + int32_t devIdx = -1; + for (uint32_t idx = 0; idx < loopCnt; idx++) { + // User token to be evaluated as UUID or device index + std::string& token = rvdTokenList_[idx]; + + // Token encodes a UUID valaue + if (token.at(0) == 'G') { + devIdx = ProcessUuidToken(token); + if (devIdx == -1) { + return usrDeviceList_.size(); + } + + // Token encodes device index + } else { + char* end = nullptr; + const char* tmp = token.c_str(); + devIdx = std::strtol(tmp, &end, 0); + if (*end != '\0') { + return usrDeviceList_.size(); + } + } + + // Rvd Token evaluates to wrong device index + if ((devIdx < 0) || (devIdx >= numGpus)) { + return usrDeviceList_.size(); + } + + // Determine if device index is previously seen + // Such indices are interpreted as terminators + bool exists = (usrDeviceList_.find(devIdx) != usrDeviceList_.end()); + if (exists) { + return usrDeviceList_.size(); + } + + // Add index to the list of devices that will be + // surfaced upon device enumeration + usrDeviceList_[devIdx] = usrIdx++; + } + + return usrDeviceList_.size(); +} + +uint32_t RvdFilter::GetUsrDeviceListSize() { return usrDeviceList_.size(); } + +int32_t RvdFilter::GetUsrDeviceRank(uint32_t roctIdx) { + const auto& it = usrDeviceList_.find(roctIdx); + if (it != usrDeviceList_.end()) { + return it->second; + } + return -1; +} + +#ifndef NDEBUG +void RvdFilter::SetDeviceUuidList() { + uint64_t dbgUuid[] = {0xBABABABABABABABA, 0xBABABABABABAABBA, 0xBABABABAABBAABBA, + 0xBABAABBAABBAABBA, 0xABBAABBAABBAABBA, 0xABBAABBAABBABABA, + 0xABBAABBABABABABA, 0xABBABABABABABABA}; + + // Override or Set Uuid values for the first four devices + uint32_t numGpus = devUuidList_.size(); + uint32_t numUuids = (sizeof(dbgUuid) / sizeof(uint64_t)); + for (uint32_t idx = 0; (idx < numGpus && (idx < numUuids)); idx++) { + std::stringstream stream; + + // For devices whose UUID is zero + if (dbgUuid[idx] == 0) { + stream << "GPU-XX"; + continue; + } + + // For devices that support valid UUID values + stream << "GPU-" << std::setfill('0') << std::setw(sizeof(uint64_t) * 2) << std::hex + << dbgUuid[idx]; + std::string uuidVal(stream.str()); + std::transform(uuidVal.begin(), uuidVal.end(), uuidVal.begin(), ::toupper); + devUuidList_[idx] = uuidVal; + } +} + +void RvdFilter::PrintDeviceUuidList() { + uint32_t numGpus = devUuidList_.size(); + for (uint32_t idx = 0; idx < numGpus; idx++) { + std::cout << "Dev[" << idx << "]: " << devUuidList_[idx]; + std::cout << std::endl << std::flush; + } +} + +void RvdFilter::PrintUsrDeviceList() { + // Flip the map values as value indicates surface rank + for (auto const& elem : usrDeviceList_) { + std::cout << "UsrDev[" << elem.second << "]: " << elem.first; + std::cout << std::endl << std::flush; + } +} + +void RvdFilter::PrintRvdTokenList() { + uint32_t numTokens = rvdTokenList_.size(); + for (uint32_t idx = 0; idx < numTokens; idx++) { + std::cout << "Token[" << idx << "]: " << rvdTokenList_[idx]; + std::cout << std::endl << std::flush; + } +} +#endif + +} // namespace amd diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_topology.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_topology.cpp index 3063d55fb2..4fe2fac4fc 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_topology.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_topology.cpp @@ -41,6 +41,7 @@ //////////////////////////////////////////////////////////////////////////////// #include "core/inc/amd_topology.h" +#include "core/inc/amd_filter_device.h" #include #include @@ -66,81 +67,6 @@ namespace amd { static const uint kKfdVersionMajor = 0; static const uint kKfdVersionMinor = 99; -#ifndef NDEBUG -static bool PrintUsrGpuMap(std::map& gpu_usr_map) { - (void)PrintUsrGpuMap; // Suppress unused symbol warning. - std::map::iterator it; - for (it = gpu_usr_map.begin(); it != gpu_usr_map.end(); it++) { - int32_t usrIdx = it->second; - uint32_t kfdIdx = it->first; - std::cout << "KfdIdx: " << kfdIdx << " @ UsrIdx: " << usrIdx << std::endl; - } - return true; -} -#endif - -/** - * Determines if user has defined the env that indicates which - * subset of Gpu's are desired to be surfaced. If defined the - * set of Gpu's are captured into a map of Gpu index and - * - * @return true if env is defined i.e. has some value including - * empty string, false otherwise. It is possible to have zero - * devices surfaced even when env is not blank. - */ -static bool MapUsrGpuList(int32_t numNodes, std::map& gpu_usr_map) { - bool filter = core::Runtime::runtime_singleton_->flag().filter_visible_gpus(); - if (filter == false) { - return false; - } - - const std::string& env_value = core::Runtime::runtime_singleton_->flag().visible_gpus(); - if (env_value.empty()) { - return true; - } - - // Capture the env value string as a parsable stream - std::stringstream stream(env_value); - - // Read stream until there are no more tokens - int32_t usrIdx = 0; - int32_t token = 0x11231926; - while (!stream.eof()) { - // Read the option value - stream >> token; - if (stream.fail()) { - return true; - } - - // Stop processing input tokens if invalid index is seen - // A value that is less than zero or greater than the - // number of Numa nodes is considered invalid - if ((token < 0) || (token >= numNodes)) { - return true; - } - - // Determine if current value has been seen before - // @note: Currently we are interpreting a repeat as - // an invalid index i.e. is equal to -1 - bool exists = gpu_usr_map.find(token) != gpu_usr_map.end(); - if (exists) { - return true; - } - - // Update Gpu User map table - gpu_usr_map[token] = usrIdx++; - - // Ignore the delimiter - if (stream.peek() == ',') { - stream.ignore(); - } else { - return true; - } - } - - return true; -} - CpuAgent* DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) { if (node_prop.NumCPUCores == 0) { return nullptr; @@ -286,15 +212,18 @@ void BuildTopology() { core::Runtime::runtime_singleton_->SetLinkCount(props.NumNodes); - // Determine and process user's request to surface - // a subset of Gpu devices + // Query if env ROCR_VISIBLE_DEVICES is defined. If defined + // determine number and order of GPU devices to be surfaced + RvdFilter rvdFilter; int32_t invalidIdx = -1; + uint32_t visibleCnt = 0; std::vector gpu_usr_list; - std::map gpu_usr_map; - bool filter = MapUsrGpuList(props.NumNodes, gpu_usr_map); - int32_t list_sz = gpu_usr_map.size(); + bool filter = RvdFilter::FilterDevices(); if (filter) { - for (int32_t idx = 0; idx < list_sz; idx++) { + rvdFilter.BuildRvdTokenList(); + rvdFilter.BuildDeviceUuidList(props.NumNodes); + visibleCnt = rvdFilter.BuildUsrDeviceList(); + for (int32_t idx = 0; idx < visibleCnt; idx++) { gpu_usr_list.push_back(invalidIdx); } } @@ -307,7 +236,7 @@ void BuildTopology() { continue; } - // Instantiate a Cpu/Apu device + // Instantiate a Cpu device const CpuAgent* cpu = DiscoverCpu(node_id, node_prop); assert(((node_prop.NumCPUCores == 0) || (cpu != nullptr)) && "CPU device failed discovery."); @@ -316,9 +245,9 @@ void BuildTopology() { // visible list, continue if not found if (node_prop.NumFComputeCores != 0) { if (filter) { - const auto& it = gpu_usr_map.find(kfdIdx); - if (it != gpu_usr_map.end()) { - gpu_usr_list[it->second] = node_id; + int32_t devRank = rvdFilter.GetUsrDeviceRank(kfdIdx); + if (devRank != (-1)) { + gpu_usr_list[devRank] = node_id; } } else { gpu_usr_list.push_back(node_id); @@ -333,6 +262,7 @@ void BuildTopology() { RegisterLinkInfo(node_id, node_prop.NumIOLinks); } + // Instantiate ROCr objects to encapsulate Gpu devices SurfaceGpuList(gpu_usr_list); } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/util/flag.h b/projects/rocr-runtime/runtime/hsa-runtime/core/util/flag.h index a5c0ad6031..8f4a474324 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/util/flag.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/util/flag.h @@ -144,6 +144,7 @@ class Flag { std::string enable_sdma() const { return enable_sdma_; } std::string visible_gpus() const { return visible_gpus_; } + bool filter_visible_gpus() const { return filter_visible_gpus_; } uint32_t max_queues() const { return max_queues_; } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/util/utils.h b/projects/rocr-runtime/runtime/hsa-runtime/core/util/utils.h old mode 100755 new mode 100644 index f7f09e9d7c..fbe7e4760a --- a/projects/rocr-runtime/runtime/hsa-runtime/core/util/utils.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/util/utils.h @@ -49,6 +49,9 @@ #include "stddef.h" #include "stdlib.h" #include +#include +#include +#include typedef unsigned int uint; typedef uint64_t uint64; @@ -307,6 +310,22 @@ static __forceinline uint64_t NextPow2(uint64_t value) { static __forceinline bool strIsEmpty(const char* str) noexcept { return str[0] == '\0'; } +static __forceinline std::string& ltrim(std::string& s) { + auto it = std::find_if(s.begin(), s.end(), + [](char c) { return !std::isspace(c, std::locale::classic()); }); + s.erase(s.begin(), it); + return s; +} + +static __forceinline std::string& rtrim(std::string& s) { + auto it = std::find_if(s.rbegin(), s.rend(), + [](char c) { return !std::isspace(c, std::locale::classic()); }); + s.erase(it.base(), s.end()); + return s; +} + +static __forceinline std::string& trim(std::string& s) { return ltrim(rtrim(s)); } + #include "atomic_helpers.h" #endif // HSA_RUNTIME_CORE_UTIL_UTIIS_H_