Extend Rocr Visible Devices functionality to include UUIDs
Change-Id: Ia2892e4033717556a422fe33dec0294fe2ca9e28
[ROCm/ROCR-Runtime commit: 89f7ef224c]
This commit is contained in:
@@ -137,6 +137,7 @@ set ( SRCS "core/util/lnx/os_linux.cpp"
|
||||
"core/runtime/amd_loader_context.cpp"
|
||||
"core/runtime/hsa_ven_amd_loader.cpp"
|
||||
"core/runtime/amd_memory_region.cpp"
|
||||
"core/runtime/amd_filter_device.cpp"
|
||||
"core/runtime/amd_topology.cpp"
|
||||
"core/runtime/default_signal.cpp"
|
||||
"core/runtime/host_queue.cpp"
|
||||
|
||||
@@ -0,0 +1,213 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_INC_AMD_FILTER_DEVICE_H_
|
||||
#define HSA_RUNTIME_CORE_INC_AMD_FILTER_DEVICE_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
#include "hsakmt.h"
|
||||
|
||||
namespace amd {
|
||||
|
||||
// ROCr allows users to filter and reorder various Gpu devices that are
|
||||
// present on ROCm system. This ability is made available via environment
|
||||
// variable ROCR_VISIBLE_DEVICES (RVD). Users are allowed to specify a list
|
||||
// of Gpu Identifiers separated by comma delimiter as the value of this env
|
||||
// variable.
|
||||
//
|
||||
// On a ROCm platform instance, a Gpu device could be identified by its:
|
||||
//
|
||||
// Index - Position at which ROCr reports it upon device enumeration
|
||||
// UUID - A string that is unique and is immutable i.e. tags Gpu
|
||||
// instance across systems and power cycles. UUID values
|
||||
// are defined to begin with "GPU-" prefix
|
||||
//
|
||||
// @note: Not all Gpu devices will report valid UUID's. For example,
|
||||
// Only devices from Gfx9 and later will encode valid UUID's. To account
|
||||
// for this and other reasons, the UUID string "GPU-XX" is defined as
|
||||
// indicating those devices. Users can still select those Gpu devices
|
||||
// by using their enumeration index
|
||||
//
|
||||
// Users are allowed to select a device by specifying its UUID string in
|
||||
// full or part. A UUID string that does not uniquely match an agent's
|
||||
// valid UUID prefix is interpreted as terminating. The UUID string
|
||||
// "GPU-XX" will not match and therefore will terminate
|
||||
//
|
||||
// RVD interpreter treats an empty token list as filtering all devices.
|
||||
// Users can use this mode to report ZERO Gpu devices
|
||||
//
|
||||
// RVD interpreter treats a token as Illegal if can't be evaluated into an
|
||||
// instance of Device UUID or Enumeration Index
|
||||
//
|
||||
// RVD interpreter treats a Legal instance of Enumeration Index as Terminating
|
||||
// if any ONE of the following conditions apply:
|
||||
// Value of index lies outside the interval [0 - (numGpuDevices - 1)]
|
||||
// Value of index maps to a device that has been previously selected
|
||||
//
|
||||
// RVD interpreter treats a Legal instance of Device UUID as Terminating
|
||||
// if any ONE of the following conditions apply:
|
||||
// Value of UUID is the literal "GPU-XX"
|
||||
// Value of UUID matches ZERO devices on system
|
||||
// Value of UUID matches TWO or more devices on system
|
||||
// Value of UUID maps to a device that has been previously selected
|
||||
//
|
||||
// RVD interpreter builds the list of Gpu devices to surface using tokens
|
||||
// that are Legal and NOT Terminating
|
||||
//
|
||||
// Following are some examples of RVD value strings and their intepretation
|
||||
// on a ROCm system with four Gpu devices. Assume for now the UUID's of the
|
||||
// four Gpu devices are:
|
||||
// Gpu-0: "GPU-BABABABABABABABA"
|
||||
// Gpu-1: "GPU-ABBAABBAABBAABBA"
|
||||
// Gpu-2: "GPU-BABAABBAABBABABA"
|
||||
// Gpu-3: "GPU-ABBABABABABAABBA"
|
||||
//
|
||||
// Surface ZERO devices
|
||||
// A1) ROCR_VISIBLE_DEVICES=""
|
||||
// A2) ROCR_VISIBLE_DEVICES="-1"
|
||||
// A3) ROCR_VISIBLE_DEVICES="GPU-XX"
|
||||
//
|
||||
// Surface Gpu-3 and Gpu-0 devices in that order
|
||||
// B) ROCR_VISIBLE_DEVICES="3,GPU-BABABABABABABABA,4"
|
||||
//
|
||||
// Surface Gpu-1 and Gpu-2 devices in that order
|
||||
// C) ROCR_VISIBLE_DEVICES="1,GPU-ABBAABBAABBAABBA,GPU-XX"
|
||||
//
|
||||
// Surface Gpu-3 and Gpu-2 devices in that order
|
||||
// D) ROCR_VISIBLE_DEVICES="3,GPU-BABAABBA,GPU-XX"
|
||||
//
|
||||
class RvdFilter {
|
||||
public:
|
||||
/// @brief Constructor
|
||||
RvdFilter() {}
|
||||
|
||||
// @brief Destructor.
|
||||
~RvdFilter() {}
|
||||
|
||||
/// @brief Determine if user has specified environment variable
|
||||
/// ROCR_VISIBLE_DEVICES (RVD) to filter and reorder Gpu devices
|
||||
///
|
||||
/// @return TRUE if user has defined the env RVD
|
||||
static bool FilterDevices();
|
||||
|
||||
/// @brief Determine if user has specified environment variable
|
||||
/// ROCR_VISIBLE_DEVICES (RVD) to filter out all Gpu devices i.e.
|
||||
/// surface ZERO devices
|
||||
///
|
||||
/// @return TRUE if user has specified ZERO to be surfaced
|
||||
bool SelectZeroDevices();
|
||||
|
||||
/// @brief Builds the list of tokens specified by user to filter
|
||||
/// and reorder Gpu devices. A token represents either a Gpu's
|
||||
/// enumeration index or its UUID value. It is possible for the
|
||||
/// list to have no tokens i.e. user has selected zero devices
|
||||
void BuildRvdTokenList();
|
||||
|
||||
/// @brief Build the list of Gpu device UUIDs as enumerated by ROCt
|
||||
///
|
||||
/// @param numNodes Number of ROCm devices present on system, includes
|
||||
/// both Cpu and Gpu's devices
|
||||
void BuildDeviceUuidList(uint32_t numNodes);
|
||||
|
||||
/// @brief Build the list of Gpu devices that will be enumerated to user
|
||||
///
|
||||
/// @return Number of Gpu devices to surface upon devices enumeration
|
||||
uint32_t BuildUsrDeviceList();
|
||||
|
||||
/// @brief Processes UUID token and returns its enumeration index
|
||||
///
|
||||
/// @param token RVD token encoding a device's UUID value
|
||||
/// @return int32_t if it is valid, -1 otherwise
|
||||
int32_t ProcessUuidToken(const std::string& token);
|
||||
|
||||
/// @brief Get the number of Gpu devices that will be surface
|
||||
/// upon device enumeration
|
||||
///
|
||||
/// @uint32_t Number of devices to enumerate including possibly
|
||||
/// ZERO devices
|
||||
uint32_t GetUsrDeviceListSize();
|
||||
|
||||
/// @brief Return the rank of queried Gpu device. If queried device
|
||||
/// is surfaced the number of Gpu devices that will be surface
|
||||
/// upon device enumeration
|
||||
///
|
||||
/// @int32_t -1 if queried device is not surfaced, else a value in
|
||||
/// the range [0 - (numGpus - 1)]
|
||||
int32_t GetUsrDeviceRank(uint32_t roctIdx);
|
||||
|
||||
#ifndef NDEBUG
|
||||
/// @brief Set debug UUID values to Gpu devices. This is intended to
|
||||
/// help debug and test RVD module functionality
|
||||
void SetDeviceUuidList();
|
||||
|
||||
/// @brief Print the list of Uuids of Gpu devices present on system
|
||||
void PrintDeviceUuidList();
|
||||
|
||||
/// @brief Print the list of Gpu devices per their enumeration order
|
||||
void PrintUsrDeviceList();
|
||||
|
||||
/// @brief Print the list of tokens specified by user to filter
|
||||
/// and reorder Gpu devices
|
||||
void PrintRvdTokenList();
|
||||
#endif
|
||||
|
||||
private:
|
||||
/// @brief List of tokens specified by user to select and reorder
|
||||
std::vector<std::string> rvdTokenList_;
|
||||
|
||||
/// @brief Ordered list of ROCt enumerated Gpu device's UUID values
|
||||
std::vector<std::string> devUuidList_;
|
||||
|
||||
/// @brief Ordered list of ROCr enumerated Gpu devices
|
||||
std::map<uint32_t, int32_t> usrDeviceList_;
|
||||
|
||||
}; // End of class RvdFilter
|
||||
|
||||
} // namespace amd
|
||||
|
||||
#endif // header guard - HSA_RUNTIME_CORE_INC_AMD_FILTER_DEVICE_H_
|
||||
@@ -0,0 +1,272 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "core/inc/amd_filter_device.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <climits>
|
||||
|
||||
#include "hsakmt.h"
|
||||
|
||||
#include "core/util/utils.h"
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/amd_cpu_agent.h"
|
||||
#include "core/inc/amd_gpu_agent.h"
|
||||
#include "core/inc/amd_memory_region.h"
|
||||
|
||||
namespace amd {
|
||||
|
||||
bool RvdFilter::FilterDevices() {
|
||||
return core::Runtime::runtime_singleton_->flag().filter_visible_gpus();
|
||||
}
|
||||
|
||||
bool RvdFilter::SelectZeroDevices() {
|
||||
const std::string& envVal = core::Runtime::runtime_singleton_->flag().visible_gpus();
|
||||
return envVal.empty();
|
||||
}
|
||||
|
||||
void RvdFilter::BuildRvdTokenList() {
|
||||
// Determine if user has chosen ZERO devices to be surfaced
|
||||
const std::string& envVal = core::Runtime::runtime_singleton_->flag().visible_gpus();
|
||||
if (envVal.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse env value into tokens separated by comma (',') delimiter
|
||||
std::string token;
|
||||
char separator = ',';
|
||||
std::stringstream stream(envVal);
|
||||
while (getline(stream, token, separator)) {
|
||||
std::transform(token.begin(), token.end(), token.begin(), ::toupper);
|
||||
token = trim(token);
|
||||
rvdTokenList_.push_back(token);
|
||||
}
|
||||
}
|
||||
|
||||
void RvdFilter::BuildDeviceUuidList(uint32_t numNodes) {
|
||||
HSAKMT_STATUS status;
|
||||
HsaNodeProperties props = {0};
|
||||
for (HSAuint32 idx = 0; idx < numNodes; idx++) {
|
||||
// Query for node properties and ignore Cpu devices
|
||||
status = hsaKmtGetNodeProperties(idx, &props);
|
||||
if (status != HSAKMT_STATUS_SUCCESS) {
|
||||
continue;
|
||||
}
|
||||
if (props.NumFComputeCores == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// For devices whose UUID is zero build a string that
|
||||
// will not match user provided value
|
||||
if (props.UniqueID == 0) {
|
||||
devUuidList_.push_back("Invalid-UUID");
|
||||
continue;
|
||||
}
|
||||
|
||||
// For devices that support valid UUID values capture UUID
|
||||
// value into a upper case hex string of length 16 including
|
||||
// leading zeros if necessary
|
||||
std::stringstream stream;
|
||||
stream << "GPU-" << std::setfill('0') << std::setw(sizeof(uint64_t) * 2) << std::hex
|
||||
<< props.UniqueID;
|
||||
std::string uuidVal(stream.str());
|
||||
std::transform(uuidVal.begin(), uuidVal.end(), uuidVal.begin(), ::toupper);
|
||||
devUuidList_.push_back(uuidVal);
|
||||
}
|
||||
}
|
||||
|
||||
int32_t RvdFilter::ProcessUuidToken(const std::string& token) {
|
||||
// Determine if token exceeds max length of a UUID string
|
||||
uint32_t tokenLen = token.length();
|
||||
if ((tokenLen < 5) || (tokenLen > 20)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Track the number of devices user token matches
|
||||
int32_t devIdx = -1;
|
||||
int32_t compareVal = -1;
|
||||
uint32_t numGpus = devUuidList_.size();
|
||||
for (uint32_t idx = 0; idx < numGpus; idx++) {
|
||||
uint32_t uuidLen = devUuidList_[idx].length();
|
||||
|
||||
// Token could match UUID of another device
|
||||
if (tokenLen > uuidLen) {
|
||||
compareVal = -1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Token could match as substring of device UUID
|
||||
compareVal = token.compare(0, tokenLen, devUuidList_[idx], 0, tokenLen);
|
||||
|
||||
// Check if user Uuid matches with ROCt Uuid
|
||||
if (compareVal == 0) {
|
||||
if (devIdx != -1) {
|
||||
return -1;
|
||||
}
|
||||
devIdx = idx;
|
||||
}
|
||||
}
|
||||
|
||||
// Return value includes possibility of both
|
||||
// finding or not finding a device
|
||||
return devIdx;
|
||||
}
|
||||
|
||||
uint32_t RvdFilter::BuildUsrDeviceList() {
|
||||
// Get number of Gpu devices and user specified tokens
|
||||
uint32_t numGpus = devUuidList_.size();
|
||||
uint32_t loopCnt = std::min(numGpus, uint32_t(rvdTokenList_.size()));
|
||||
|
||||
// Evaluate tokens into device index or UUID values
|
||||
int32_t usrIdx = 0;
|
||||
int32_t devIdx = -1;
|
||||
for (uint32_t idx = 0; idx < loopCnt; idx++) {
|
||||
// User token to be evaluated as UUID or device index
|
||||
std::string& token = rvdTokenList_[idx];
|
||||
|
||||
// Token encodes a UUID valaue
|
||||
if (token.at(0) == 'G') {
|
||||
devIdx = ProcessUuidToken(token);
|
||||
if (devIdx == -1) {
|
||||
return usrDeviceList_.size();
|
||||
}
|
||||
|
||||
// Token encodes device index
|
||||
} else {
|
||||
char* end = nullptr;
|
||||
const char* tmp = token.c_str();
|
||||
devIdx = std::strtol(tmp, &end, 0);
|
||||
if (*end != '\0') {
|
||||
return usrDeviceList_.size();
|
||||
}
|
||||
}
|
||||
|
||||
// Rvd Token evaluates to wrong device index
|
||||
if ((devIdx < 0) || (devIdx >= numGpus)) {
|
||||
return usrDeviceList_.size();
|
||||
}
|
||||
|
||||
// Determine if device index is previously seen
|
||||
// Such indices are interpreted as terminators
|
||||
bool exists = (usrDeviceList_.find(devIdx) != usrDeviceList_.end());
|
||||
if (exists) {
|
||||
return usrDeviceList_.size();
|
||||
}
|
||||
|
||||
// Add index to the list of devices that will be
|
||||
// surfaced upon device enumeration
|
||||
usrDeviceList_[devIdx] = usrIdx++;
|
||||
}
|
||||
|
||||
return usrDeviceList_.size();
|
||||
}
|
||||
|
||||
uint32_t RvdFilter::GetUsrDeviceListSize() { return usrDeviceList_.size(); }
|
||||
|
||||
int32_t RvdFilter::GetUsrDeviceRank(uint32_t roctIdx) {
|
||||
const auto& it = usrDeviceList_.find(roctIdx);
|
||||
if (it != usrDeviceList_.end()) {
|
||||
return it->second;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
void RvdFilter::SetDeviceUuidList() {
|
||||
uint64_t dbgUuid[] = {0xBABABABABABABABA, 0xBABABABABABAABBA, 0xBABABABAABBAABBA,
|
||||
0xBABAABBAABBAABBA, 0xABBAABBAABBAABBA, 0xABBAABBAABBABABA,
|
||||
0xABBAABBABABABABA, 0xABBABABABABABABA};
|
||||
|
||||
// Override or Set Uuid values for the first four devices
|
||||
uint32_t numGpus = devUuidList_.size();
|
||||
uint32_t numUuids = (sizeof(dbgUuid) / sizeof(uint64_t));
|
||||
for (uint32_t idx = 0; (idx < numGpus && (idx < numUuids)); idx++) {
|
||||
std::stringstream stream;
|
||||
|
||||
// For devices whose UUID is zero
|
||||
if (dbgUuid[idx] == 0) {
|
||||
stream << "GPU-XX";
|
||||
continue;
|
||||
}
|
||||
|
||||
// For devices that support valid UUID values
|
||||
stream << "GPU-" << std::setfill('0') << std::setw(sizeof(uint64_t) * 2) << std::hex
|
||||
<< dbgUuid[idx];
|
||||
std::string uuidVal(stream.str());
|
||||
std::transform(uuidVal.begin(), uuidVal.end(), uuidVal.begin(), ::toupper);
|
||||
devUuidList_[idx] = uuidVal;
|
||||
}
|
||||
}
|
||||
|
||||
void RvdFilter::PrintDeviceUuidList() {
|
||||
uint32_t numGpus = devUuidList_.size();
|
||||
for (uint32_t idx = 0; idx < numGpus; idx++) {
|
||||
std::cout << "Dev[" << idx << "]: " << devUuidList_[idx];
|
||||
std::cout << std::endl << std::flush;
|
||||
}
|
||||
}
|
||||
|
||||
void RvdFilter::PrintUsrDeviceList() {
|
||||
// Flip the map values as value indicates surface rank
|
||||
for (auto const& elem : usrDeviceList_) {
|
||||
std::cout << "UsrDev[" << elem.second << "]: " << elem.first;
|
||||
std::cout << std::endl << std::flush;
|
||||
}
|
||||
}
|
||||
|
||||
void RvdFilter::PrintRvdTokenList() {
|
||||
uint32_t numTokens = rvdTokenList_.size();
|
||||
for (uint32_t idx = 0; idx < numTokens; idx++) {
|
||||
std::cout << "Token[" << idx << "]: " << rvdTokenList_[idx];
|
||||
std::cout << std::endl << std::flush;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace amd
|
||||
@@ -41,6 +41,7 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "core/inc/amd_topology.h"
|
||||
#include "core/inc/amd_filter_device.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
@@ -66,81 +67,6 @@ namespace amd {
|
||||
static const uint kKfdVersionMajor = 0;
|
||||
static const uint kKfdVersionMinor = 99;
|
||||
|
||||
#ifndef NDEBUG
|
||||
static bool PrintUsrGpuMap(std::map<uint32_t, int32_t>& gpu_usr_map) {
|
||||
(void)PrintUsrGpuMap; // Suppress unused symbol warning.
|
||||
std::map<uint32_t, int32_t>::iterator it;
|
||||
for (it = gpu_usr_map.begin(); it != gpu_usr_map.end(); it++) {
|
||||
int32_t usrIdx = it->second;
|
||||
uint32_t kfdIdx = it->first;
|
||||
std::cout << "KfdIdx: " << kfdIdx << " @ UsrIdx: " << usrIdx << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Determines if user has defined the env that indicates which
|
||||
* subset of Gpu's are desired to be surfaced. If defined the
|
||||
* set of Gpu's are captured into a map of Gpu index and
|
||||
*
|
||||
* @return true if env is defined i.e. has some value including
|
||||
* empty string, false otherwise. It is possible to have zero
|
||||
* devices surfaced even when env is not blank.
|
||||
*/
|
||||
static bool MapUsrGpuList(int32_t numNodes, std::map<uint32_t, int32_t>& gpu_usr_map) {
|
||||
bool filter = core::Runtime::runtime_singleton_->flag().filter_visible_gpus();
|
||||
if (filter == false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const std::string& env_value = core::Runtime::runtime_singleton_->flag().visible_gpus();
|
||||
if (env_value.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Capture the env value string as a parsable stream
|
||||
std::stringstream stream(env_value);
|
||||
|
||||
// Read stream until there are no more tokens
|
||||
int32_t usrIdx = 0;
|
||||
int32_t token = 0x11231926;
|
||||
while (!stream.eof()) {
|
||||
// Read the option value
|
||||
stream >> token;
|
||||
if (stream.fail()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Stop processing input tokens if invalid index is seen
|
||||
// A value that is less than zero or greater than the
|
||||
// number of Numa nodes is considered invalid
|
||||
if ((token < 0) || (token >= numNodes)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Determine if current value has been seen before
|
||||
// @note: Currently we are interpreting a repeat as
|
||||
// an invalid index i.e. is equal to -1
|
||||
bool exists = gpu_usr_map.find(token) != gpu_usr_map.end();
|
||||
if (exists) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Update Gpu User map table
|
||||
gpu_usr_map[token] = usrIdx++;
|
||||
|
||||
// Ignore the delimiter
|
||||
if (stream.peek() == ',') {
|
||||
stream.ignore();
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
CpuAgent* DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) {
|
||||
if (node_prop.NumCPUCores == 0) {
|
||||
return nullptr;
|
||||
@@ -286,15 +212,18 @@ void BuildTopology() {
|
||||
|
||||
core::Runtime::runtime_singleton_->SetLinkCount(props.NumNodes);
|
||||
|
||||
// Determine and process user's request to surface
|
||||
// a subset of Gpu devices
|
||||
// Query if env ROCR_VISIBLE_DEVICES is defined. If defined
|
||||
// determine number and order of GPU devices to be surfaced
|
||||
RvdFilter rvdFilter;
|
||||
int32_t invalidIdx = -1;
|
||||
uint32_t visibleCnt = 0;
|
||||
std::vector<int32_t> gpu_usr_list;
|
||||
std::map<uint32_t, int32_t> gpu_usr_map;
|
||||
bool filter = MapUsrGpuList(props.NumNodes, gpu_usr_map);
|
||||
int32_t list_sz = gpu_usr_map.size();
|
||||
bool filter = RvdFilter::FilterDevices();
|
||||
if (filter) {
|
||||
for (int32_t idx = 0; idx < list_sz; idx++) {
|
||||
rvdFilter.BuildRvdTokenList();
|
||||
rvdFilter.BuildDeviceUuidList(props.NumNodes);
|
||||
visibleCnt = rvdFilter.BuildUsrDeviceList();
|
||||
for (int32_t idx = 0; idx < visibleCnt; idx++) {
|
||||
gpu_usr_list.push_back(invalidIdx);
|
||||
}
|
||||
}
|
||||
@@ -307,7 +236,7 @@ void BuildTopology() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Instantiate a Cpu/Apu device
|
||||
// Instantiate a Cpu device
|
||||
const CpuAgent* cpu = DiscoverCpu(node_id, node_prop);
|
||||
assert(((node_prop.NumCPUCores == 0) || (cpu != nullptr)) && "CPU device failed discovery.");
|
||||
|
||||
@@ -316,9 +245,9 @@ void BuildTopology() {
|
||||
// visible list, continue if not found
|
||||
if (node_prop.NumFComputeCores != 0) {
|
||||
if (filter) {
|
||||
const auto& it = gpu_usr_map.find(kfdIdx);
|
||||
if (it != gpu_usr_map.end()) {
|
||||
gpu_usr_list[it->second] = node_id;
|
||||
int32_t devRank = rvdFilter.GetUsrDeviceRank(kfdIdx);
|
||||
if (devRank != (-1)) {
|
||||
gpu_usr_list[devRank] = node_id;
|
||||
}
|
||||
} else {
|
||||
gpu_usr_list.push_back(node_id);
|
||||
@@ -333,6 +262,7 @@ void BuildTopology() {
|
||||
RegisterLinkInfo(node_id, node_prop.NumIOLinks);
|
||||
}
|
||||
|
||||
// Instantiate ROCr objects to encapsulate Gpu devices
|
||||
SurfaceGpuList(gpu_usr_list);
|
||||
}
|
||||
|
||||
|
||||
@@ -144,6 +144,7 @@ class Flag {
|
||||
std::string enable_sdma() const { return enable_sdma_; }
|
||||
|
||||
std::string visible_gpus() const { return visible_gpus_; }
|
||||
|
||||
bool filter_visible_gpus() const { return filter_visible_gpus_; }
|
||||
|
||||
uint32_t max_queues() const { return max_queues_; }
|
||||
|
||||
Executable → Regular
+19
@@ -49,6 +49,9 @@
|
||||
#include "stddef.h"
|
||||
#include "stdlib.h"
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
|
||||
typedef unsigned int uint;
|
||||
typedef uint64_t uint64;
|
||||
@@ -307,6 +310,22 @@ static __forceinline uint64_t NextPow2(uint64_t value) {
|
||||
|
||||
static __forceinline bool strIsEmpty(const char* str) noexcept { return str[0] == '\0'; }
|
||||
|
||||
static __forceinline std::string& ltrim(std::string& s) {
|
||||
auto it = std::find_if(s.begin(), s.end(),
|
||||
[](char c) { return !std::isspace<char>(c, std::locale::classic()); });
|
||||
s.erase(s.begin(), it);
|
||||
return s;
|
||||
}
|
||||
|
||||
static __forceinline std::string& rtrim(std::string& s) {
|
||||
auto it = std::find_if(s.rbegin(), s.rend(),
|
||||
[](char c) { return !std::isspace<char>(c, std::locale::classic()); });
|
||||
s.erase(it.base(), s.end());
|
||||
return s;
|
||||
}
|
||||
|
||||
static __forceinline std::string& trim(std::string& s) { return ltrim(rtrim(s)); }
|
||||
|
||||
#include "atomic_helpers.h"
|
||||
|
||||
#endif // HSA_RUNTIME_CORE_UTIL_UTIIS_H_
|
||||
|
||||
Fai riferimento in un nuovo problema
Block a user