Extend Rocr Visible Devices functionality to include UUIDs

Change-Id: Ia2892e4033717556a422fe33dec0294fe2ca9e28


[ROCm/ROCR-Runtime commit: 89f7ef224c]
This commit is contained in:
Ramesh Errabolu
2020-03-05 13:44:22 -06:00
parent e8f4f2d9e2
commit ccd4e85fc9
6 ha cambiato i file con 521 aggiunte e 85 eliminazioni
@@ -137,6 +137,7 @@ set ( SRCS "core/util/lnx/os_linux.cpp"
"core/runtime/amd_loader_context.cpp"
"core/runtime/hsa_ven_amd_loader.cpp"
"core/runtime/amd_memory_region.cpp"
"core/runtime/amd_filter_device.cpp"
"core/runtime/amd_topology.cpp"
"core/runtime/default_signal.cpp"
"core/runtime/host_queue.cpp"
@@ -0,0 +1,213 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef HSA_RUNTIME_CORE_INC_AMD_FILTER_DEVICE_H_
#define HSA_RUNTIME_CORE_INC_AMD_FILTER_DEVICE_H_
#include <algorithm>
#include <cstring>
#include <vector>
#include <map>
#include <string>
#include <sstream>
#include "hsakmt.h"
namespace amd {
// ROCr allows users to filter and reorder various Gpu devices that are
// present on ROCm system. This ability is made available via environment
// variable ROCR_VISIBLE_DEVICES (RVD). Users are allowed to specify a list
// of Gpu Identifiers separated by comma delimiter as the value of this env
// variable.
//
// On a ROCm platform instance, a Gpu device could be identified by its:
//
// Index - Position at which ROCr reports it upon device enumeration
// UUID - A string that is unique and is immutable i.e. tags Gpu
// instance across systems and power cycles. UUID values
// are defined to begin with "GPU-" prefix
//
// @note: Not all Gpu devices will report valid UUID's. For example,
// Only devices from Gfx9 and later will encode valid UUID's. To account
// for this and other reasons, the UUID string "GPU-XX" is defined as
// indicating those devices. Users can still select those Gpu devices
// by using their enumeration index
//
// Users are allowed to select a device by specifying its UUID string in
// full or part. A UUID string that does not uniquely match an agent's
// valid UUID prefix is interpreted as terminating. The UUID string
// "GPU-XX" will not match and therefore will terminate
//
// RVD interpreter treats an empty token list as filtering all devices.
// Users can use this mode to report ZERO Gpu devices
//
// RVD interpreter treats a token as Illegal if can't be evaluated into an
// instance of Device UUID or Enumeration Index
//
// RVD interpreter treats a Legal instance of Enumeration Index as Terminating
// if any ONE of the following conditions apply:
// Value of index lies outside the interval [0 - (numGpuDevices - 1)]
// Value of index maps to a device that has been previously selected
//
// RVD interpreter treats a Legal instance of Device UUID as Terminating
// if any ONE of the following conditions apply:
// Value of UUID is the literal "GPU-XX"
// Value of UUID matches ZERO devices on system
// Value of UUID matches TWO or more devices on system
// Value of UUID maps to a device that has been previously selected
//
// RVD interpreter builds the list of Gpu devices to surface using tokens
// that are Legal and NOT Terminating
//
// Following are some examples of RVD value strings and their intepretation
// on a ROCm system with four Gpu devices. Assume for now the UUID's of the
// four Gpu devices are:
// Gpu-0: "GPU-BABABABABABABABA"
// Gpu-1: "GPU-ABBAABBAABBAABBA"
// Gpu-2: "GPU-BABAABBAABBABABA"
// Gpu-3: "GPU-ABBABABABABAABBA"
//
// Surface ZERO devices
// A1) ROCR_VISIBLE_DEVICES=""
// A2) ROCR_VISIBLE_DEVICES="-1"
// A3) ROCR_VISIBLE_DEVICES="GPU-XX"
//
// Surface Gpu-3 and Gpu-0 devices in that order
// B) ROCR_VISIBLE_DEVICES="3,GPU-BABABABABABABABA,4"
//
// Surface Gpu-1 and Gpu-2 devices in that order
// C) ROCR_VISIBLE_DEVICES="1,GPU-ABBAABBAABBAABBA,GPU-XX"
//
// Surface Gpu-3 and Gpu-2 devices in that order
// D) ROCR_VISIBLE_DEVICES="3,GPU-BABAABBA,GPU-XX"
//
class RvdFilter {
public:
/// @brief Constructor
RvdFilter() {}
// @brief Destructor.
~RvdFilter() {}
/// @brief Determine if user has specified environment variable
/// ROCR_VISIBLE_DEVICES (RVD) to filter and reorder Gpu devices
///
/// @return TRUE if user has defined the env RVD
static bool FilterDevices();
/// @brief Determine if user has specified environment variable
/// ROCR_VISIBLE_DEVICES (RVD) to filter out all Gpu devices i.e.
/// surface ZERO devices
///
/// @return TRUE if user has specified ZERO to be surfaced
bool SelectZeroDevices();
/// @brief Builds the list of tokens specified by user to filter
/// and reorder Gpu devices. A token represents either a Gpu's
/// enumeration index or its UUID value. It is possible for the
/// list to have no tokens i.e. user has selected zero devices
void BuildRvdTokenList();
/// @brief Build the list of Gpu device UUIDs as enumerated by ROCt
///
/// @param numNodes Number of ROCm devices present on system, includes
/// both Cpu and Gpu's devices
void BuildDeviceUuidList(uint32_t numNodes);
/// @brief Build the list of Gpu devices that will be enumerated to user
///
/// @return Number of Gpu devices to surface upon devices enumeration
uint32_t BuildUsrDeviceList();
/// @brief Processes UUID token and returns its enumeration index
///
/// @param token RVD token encoding a device's UUID value
/// @return int32_t if it is valid, -1 otherwise
int32_t ProcessUuidToken(const std::string& token);
/// @brief Get the number of Gpu devices that will be surface
/// upon device enumeration
///
/// @uint32_t Number of devices to enumerate including possibly
/// ZERO devices
uint32_t GetUsrDeviceListSize();
/// @brief Return the rank of queried Gpu device. If queried device
/// is surfaced the number of Gpu devices that will be surface
/// upon device enumeration
///
/// @int32_t -1 if queried device is not surfaced, else a value in
/// the range [0 - (numGpus - 1)]
int32_t GetUsrDeviceRank(uint32_t roctIdx);
#ifndef NDEBUG
/// @brief Set debug UUID values to Gpu devices. This is intended to
/// help debug and test RVD module functionality
void SetDeviceUuidList();
/// @brief Print the list of Uuids of Gpu devices present on system
void PrintDeviceUuidList();
/// @brief Print the list of Gpu devices per their enumeration order
void PrintUsrDeviceList();
/// @brief Print the list of tokens specified by user to filter
/// and reorder Gpu devices
void PrintRvdTokenList();
#endif
private:
/// @brief List of tokens specified by user to select and reorder
std::vector<std::string> rvdTokenList_;
/// @brief Ordered list of ROCt enumerated Gpu device's UUID values
std::vector<std::string> devUuidList_;
/// @brief Ordered list of ROCr enumerated Gpu devices
std::map<uint32_t, int32_t> usrDeviceList_;
}; // End of class RvdFilter
} // namespace amd
#endif // header guard - HSA_RUNTIME_CORE_INC_AMD_FILTER_DEVICE_H_
@@ -0,0 +1,272 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include "core/inc/amd_filter_device.h"
#include <algorithm>
#include <cstring>
#include <vector>
#include <map>
#include <string>
#include <sstream>
#include <iomanip>
#include <iostream>
#include <climits>
#include "hsakmt.h"
#include "core/util/utils.h"
#include "core/inc/runtime.h"
#include "core/inc/amd_cpu_agent.h"
#include "core/inc/amd_gpu_agent.h"
#include "core/inc/amd_memory_region.h"
namespace amd {
bool RvdFilter::FilterDevices() {
return core::Runtime::runtime_singleton_->flag().filter_visible_gpus();
}
bool RvdFilter::SelectZeroDevices() {
const std::string& envVal = core::Runtime::runtime_singleton_->flag().visible_gpus();
return envVal.empty();
}
void RvdFilter::BuildRvdTokenList() {
// Determine if user has chosen ZERO devices to be surfaced
const std::string& envVal = core::Runtime::runtime_singleton_->flag().visible_gpus();
if (envVal.empty()) {
return;
}
// Parse env value into tokens separated by comma (',') delimiter
std::string token;
char separator = ',';
std::stringstream stream(envVal);
while (getline(stream, token, separator)) {
std::transform(token.begin(), token.end(), token.begin(), ::toupper);
token = trim(token);
rvdTokenList_.push_back(token);
}
}
void RvdFilter::BuildDeviceUuidList(uint32_t numNodes) {
HSAKMT_STATUS status;
HsaNodeProperties props = {0};
for (HSAuint32 idx = 0; idx < numNodes; idx++) {
// Query for node properties and ignore Cpu devices
status = hsaKmtGetNodeProperties(idx, &props);
if (status != HSAKMT_STATUS_SUCCESS) {
continue;
}
if (props.NumFComputeCores == 0) {
continue;
}
// For devices whose UUID is zero build a string that
// will not match user provided value
if (props.UniqueID == 0) {
devUuidList_.push_back("Invalid-UUID");
continue;
}
// For devices that support valid UUID values capture UUID
// value into a upper case hex string of length 16 including
// leading zeros if necessary
std::stringstream stream;
stream << "GPU-" << std::setfill('0') << std::setw(sizeof(uint64_t) * 2) << std::hex
<< props.UniqueID;
std::string uuidVal(stream.str());
std::transform(uuidVal.begin(), uuidVal.end(), uuidVal.begin(), ::toupper);
devUuidList_.push_back(uuidVal);
}
}
int32_t RvdFilter::ProcessUuidToken(const std::string& token) {
// Determine if token exceeds max length of a UUID string
uint32_t tokenLen = token.length();
if ((tokenLen < 5) || (tokenLen > 20)) {
return -1;
}
// Track the number of devices user token matches
int32_t devIdx = -1;
int32_t compareVal = -1;
uint32_t numGpus = devUuidList_.size();
for (uint32_t idx = 0; idx < numGpus; idx++) {
uint32_t uuidLen = devUuidList_[idx].length();
// Token could match UUID of another device
if (tokenLen > uuidLen) {
compareVal = -1;
continue;
}
// Token could match as substring of device UUID
compareVal = token.compare(0, tokenLen, devUuidList_[idx], 0, tokenLen);
// Check if user Uuid matches with ROCt Uuid
if (compareVal == 0) {
if (devIdx != -1) {
return -1;
}
devIdx = idx;
}
}
// Return value includes possibility of both
// finding or not finding a device
return devIdx;
}
uint32_t RvdFilter::BuildUsrDeviceList() {
// Get number of Gpu devices and user specified tokens
uint32_t numGpus = devUuidList_.size();
uint32_t loopCnt = std::min(numGpus, uint32_t(rvdTokenList_.size()));
// Evaluate tokens into device index or UUID values
int32_t usrIdx = 0;
int32_t devIdx = -1;
for (uint32_t idx = 0; idx < loopCnt; idx++) {
// User token to be evaluated as UUID or device index
std::string& token = rvdTokenList_[idx];
// Token encodes a UUID valaue
if (token.at(0) == 'G') {
devIdx = ProcessUuidToken(token);
if (devIdx == -1) {
return usrDeviceList_.size();
}
// Token encodes device index
} else {
char* end = nullptr;
const char* tmp = token.c_str();
devIdx = std::strtol(tmp, &end, 0);
if (*end != '\0') {
return usrDeviceList_.size();
}
}
// Rvd Token evaluates to wrong device index
if ((devIdx < 0) || (devIdx >= numGpus)) {
return usrDeviceList_.size();
}
// Determine if device index is previously seen
// Such indices are interpreted as terminators
bool exists = (usrDeviceList_.find(devIdx) != usrDeviceList_.end());
if (exists) {
return usrDeviceList_.size();
}
// Add index to the list of devices that will be
// surfaced upon device enumeration
usrDeviceList_[devIdx] = usrIdx++;
}
return usrDeviceList_.size();
}
uint32_t RvdFilter::GetUsrDeviceListSize() { return usrDeviceList_.size(); }
int32_t RvdFilter::GetUsrDeviceRank(uint32_t roctIdx) {
const auto& it = usrDeviceList_.find(roctIdx);
if (it != usrDeviceList_.end()) {
return it->second;
}
return -1;
}
#ifndef NDEBUG
void RvdFilter::SetDeviceUuidList() {
uint64_t dbgUuid[] = {0xBABABABABABABABA, 0xBABABABABABAABBA, 0xBABABABAABBAABBA,
0xBABAABBAABBAABBA, 0xABBAABBAABBAABBA, 0xABBAABBAABBABABA,
0xABBAABBABABABABA, 0xABBABABABABABABA};
// Override or Set Uuid values for the first four devices
uint32_t numGpus = devUuidList_.size();
uint32_t numUuids = (sizeof(dbgUuid) / sizeof(uint64_t));
for (uint32_t idx = 0; (idx < numGpus && (idx < numUuids)); idx++) {
std::stringstream stream;
// For devices whose UUID is zero
if (dbgUuid[idx] == 0) {
stream << "GPU-XX";
continue;
}
// For devices that support valid UUID values
stream << "GPU-" << std::setfill('0') << std::setw(sizeof(uint64_t) * 2) << std::hex
<< dbgUuid[idx];
std::string uuidVal(stream.str());
std::transform(uuidVal.begin(), uuidVal.end(), uuidVal.begin(), ::toupper);
devUuidList_[idx] = uuidVal;
}
}
void RvdFilter::PrintDeviceUuidList() {
uint32_t numGpus = devUuidList_.size();
for (uint32_t idx = 0; idx < numGpus; idx++) {
std::cout << "Dev[" << idx << "]: " << devUuidList_[idx];
std::cout << std::endl << std::flush;
}
}
void RvdFilter::PrintUsrDeviceList() {
// Flip the map values as value indicates surface rank
for (auto const& elem : usrDeviceList_) {
std::cout << "UsrDev[" << elem.second << "]: " << elem.first;
std::cout << std::endl << std::flush;
}
}
void RvdFilter::PrintRvdTokenList() {
uint32_t numTokens = rvdTokenList_.size();
for (uint32_t idx = 0; idx < numTokens; idx++) {
std::cout << "Token[" << idx << "]: " << rvdTokenList_[idx];
std::cout << std::endl << std::flush;
}
}
#endif
} // namespace amd
@@ -41,6 +41,7 @@
////////////////////////////////////////////////////////////////////////////////
#include "core/inc/amd_topology.h"
#include "core/inc/amd_filter_device.h"
#include <algorithm>
#include <cstring>
@@ -66,81 +67,6 @@ namespace amd {
static const uint kKfdVersionMajor = 0;
static const uint kKfdVersionMinor = 99;
#ifndef NDEBUG
static bool PrintUsrGpuMap(std::map<uint32_t, int32_t>& gpu_usr_map) {
(void)PrintUsrGpuMap; // Suppress unused symbol warning.
std::map<uint32_t, int32_t>::iterator it;
for (it = gpu_usr_map.begin(); it != gpu_usr_map.end(); it++) {
int32_t usrIdx = it->second;
uint32_t kfdIdx = it->first;
std::cout << "KfdIdx: " << kfdIdx << " @ UsrIdx: " << usrIdx << std::endl;
}
return true;
}
#endif
/**
* Determines if user has defined the env that indicates which
* subset of Gpu's are desired to be surfaced. If defined the
* set of Gpu's are captured into a map of Gpu index and
*
* @return true if env is defined i.e. has some value including
* empty string, false otherwise. It is possible to have zero
* devices surfaced even when env is not blank.
*/
static bool MapUsrGpuList(int32_t numNodes, std::map<uint32_t, int32_t>& gpu_usr_map) {
bool filter = core::Runtime::runtime_singleton_->flag().filter_visible_gpus();
if (filter == false) {
return false;
}
const std::string& env_value = core::Runtime::runtime_singleton_->flag().visible_gpus();
if (env_value.empty()) {
return true;
}
// Capture the env value string as a parsable stream
std::stringstream stream(env_value);
// Read stream until there are no more tokens
int32_t usrIdx = 0;
int32_t token = 0x11231926;
while (!stream.eof()) {
// Read the option value
stream >> token;
if (stream.fail()) {
return true;
}
// Stop processing input tokens if invalid index is seen
// A value that is less than zero or greater than the
// number of Numa nodes is considered invalid
if ((token < 0) || (token >= numNodes)) {
return true;
}
// Determine if current value has been seen before
// @note: Currently we are interpreting a repeat as
// an invalid index i.e. is equal to -1
bool exists = gpu_usr_map.find(token) != gpu_usr_map.end();
if (exists) {
return true;
}
// Update Gpu User map table
gpu_usr_map[token] = usrIdx++;
// Ignore the delimiter
if (stream.peek() == ',') {
stream.ignore();
} else {
return true;
}
}
return true;
}
CpuAgent* DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) {
if (node_prop.NumCPUCores == 0) {
return nullptr;
@@ -286,15 +212,18 @@ void BuildTopology() {
core::Runtime::runtime_singleton_->SetLinkCount(props.NumNodes);
// Determine and process user's request to surface
// a subset of Gpu devices
// Query if env ROCR_VISIBLE_DEVICES is defined. If defined
// determine number and order of GPU devices to be surfaced
RvdFilter rvdFilter;
int32_t invalidIdx = -1;
uint32_t visibleCnt = 0;
std::vector<int32_t> gpu_usr_list;
std::map<uint32_t, int32_t> gpu_usr_map;
bool filter = MapUsrGpuList(props.NumNodes, gpu_usr_map);
int32_t list_sz = gpu_usr_map.size();
bool filter = RvdFilter::FilterDevices();
if (filter) {
for (int32_t idx = 0; idx < list_sz; idx++) {
rvdFilter.BuildRvdTokenList();
rvdFilter.BuildDeviceUuidList(props.NumNodes);
visibleCnt = rvdFilter.BuildUsrDeviceList();
for (int32_t idx = 0; idx < visibleCnt; idx++) {
gpu_usr_list.push_back(invalidIdx);
}
}
@@ -307,7 +236,7 @@ void BuildTopology() {
continue;
}
// Instantiate a Cpu/Apu device
// Instantiate a Cpu device
const CpuAgent* cpu = DiscoverCpu(node_id, node_prop);
assert(((node_prop.NumCPUCores == 0) || (cpu != nullptr)) && "CPU device failed discovery.");
@@ -316,9 +245,9 @@ void BuildTopology() {
// visible list, continue if not found
if (node_prop.NumFComputeCores != 0) {
if (filter) {
const auto& it = gpu_usr_map.find(kfdIdx);
if (it != gpu_usr_map.end()) {
gpu_usr_list[it->second] = node_id;
int32_t devRank = rvdFilter.GetUsrDeviceRank(kfdIdx);
if (devRank != (-1)) {
gpu_usr_list[devRank] = node_id;
}
} else {
gpu_usr_list.push_back(node_id);
@@ -333,6 +262,7 @@ void BuildTopology() {
RegisterLinkInfo(node_id, node_prop.NumIOLinks);
}
// Instantiate ROCr objects to encapsulate Gpu devices
SurfaceGpuList(gpu_usr_list);
}
@@ -144,6 +144,7 @@ class Flag {
std::string enable_sdma() const { return enable_sdma_; }
std::string visible_gpus() const { return visible_gpus_; }
bool filter_visible_gpus() const { return filter_visible_gpus_; }
uint32_t max_queues() const { return max_queues_; }
+19
Vedi File
@@ -49,6 +49,9 @@
#include "stddef.h"
#include "stdlib.h"
#include <assert.h>
#include <iostream>
#include <string>
#include <algorithm>
typedef unsigned int uint;
typedef uint64_t uint64;
@@ -307,6 +310,22 @@ static __forceinline uint64_t NextPow2(uint64_t value) {
static __forceinline bool strIsEmpty(const char* str) noexcept { return str[0] == '\0'; }
static __forceinline std::string& ltrim(std::string& s) {
auto it = std::find_if(s.begin(), s.end(),
[](char c) { return !std::isspace<char>(c, std::locale::classic()); });
s.erase(s.begin(), it);
return s;
}
static __forceinline std::string& rtrim(std::string& s) {
auto it = std::find_if(s.rbegin(), s.rend(),
[](char c) { return !std::isspace<char>(c, std::locale::classic()); });
s.erase(it.base(), s.end());
return s;
}
static __forceinline std::string& trim(std::string& s) { return ltrim(rtrim(s)); }
#include "atomic_helpers.h"
#endif // HSA_RUNTIME_CORE_UTIL_UTIIS_H_