Allows users, via env ROCR_VISIBLE_DEVICES, to surface a subset of Gpu devices

Change-Id: I5662639d5d70f054831969669f9d30dec356dd5a

Update per review comments

Change-Id: I18c7d7cb00b261493b61c2cf5454d486166f40d8
This commit is contained in:
Ramesh Errabolu
2019-01-31 17:27:06 -06:00
committed by Sean Keely
parent 014945310a
commit 3fbf03af76
2 changed files with 161 additions and 20 deletions
+147 -12
View File
@@ -2,24 +2,24 @@
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
//
// Developed by:
//
//
// AMD Research and AMD HSA Software Development
//
//
// Advanced Micro Devices, Inc.
//
//
// www.amd.com
//
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
@@ -29,7 +29,7 @@
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
@@ -45,6 +45,13 @@
#include <algorithm>
#include <cstring>
#include <vector>
#include <map>
#include <string>
#include <sstream>
#ifndef NDBEUG
#include <iostream>
#endif
#include "hsakmt.h"
@@ -59,9 +66,79 @@ namespace amd {
static const uint kKfdVersionMajor = 0;
static const uint kKfdVersionMinor = 99;
#ifndef NDEBUG
static bool PrintUsrGpuMap(std::map<uint32_t, int32_t>& gpu_usr_map) {
(void)PrintUsrGpuMap; //Suppress unused symbol warning.
std::map<uint32_t, int32_t>::iterator it;
for (it = gpu_usr_map.begin(); it != gpu_usr_map.end(); it++) {
int32_t usrIdx = it->second;
uint32_t kfdIdx = it->first;
std::cout << "KfdIdx: " << kfdIdx << " @ UsrIdx: " << usrIdx << std::endl;
}
return true;
}
#endif
/**
* Determines if user has defined the env that indicates which
* subset of Gpu's are desired to be surfaced. If defined the
* set of Gpu's are captured into a map of Gpu index and
*
* @return true if env is not blank, false otherwise. It is
* possible to have zero devices surfaced even when env is
* not blank.
*/
static bool MapUsrGpuList(int32_t numNodes, std::map<uint32_t, int32_t>& gpu_usr_map) {
const std::string& env_value = core::Runtime::runtime_singleton_->flag().visible_gpus();
if (env_value.empty()) {
return false;
}
// Capture the env value string as a parsable stream
std::stringstream stream(env_value);
// Read stream until there are no more tokens
int32_t usrIdx = 0;
int32_t token = 0x11231926;
while (!stream.eof()) {
// Read the option value
stream >> token;
if (stream.fail()) {
return true;
}
// Stop processing input tokens if invalid index is seen
// A value that is less than zero or greater than the
// number of Numa nodes is considered invalid
if ((token < 0) || (token >= numNodes)) {
return true;
}
// Determine if current value has been seen before
// @note: Currently we are interpreting a repeat as
// an invalid index i.e. is equal to -1
bool exists = gpu_usr_map.find(token) != gpu_usr_map.end();
if (exists) {
return true;
}
// Update Gpu User map table
gpu_usr_map[token] = usrIdx++;
// Ignore the delimiter
if (stream.peek() == ',') {
stream.ignore();
} else {
return true;
}
}
return true;
}
CpuAgent* DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) {
if (node_prop.NumCPUCores == 0) {
return NULL;
return nullptr;
}
CpuAgent* cpu = new CpuAgent(node_id, node_prop);
@@ -72,7 +149,7 @@ CpuAgent* DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) {
GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop) {
if (node_prop.NumFComputeCores == 0) {
return NULL;
return nullptr;
}
GpuAgent* gpu = new GpuAgent(node_id, node_prop);
@@ -152,6 +229,30 @@ void RegisterLinkInfo(uint32_t node_id, uint32_t num_link) {
}
}
/**
* Process the list of Gpus that are surfaced to user
*/
static void SurfaceGpuList(std::vector<int32_t>& gpu_list) {
// Process user visible Gpu devices
int32_t invalidIdx = -1;
int32_t list_sz = gpu_list.size();
HsaNodeProperties node_prop = {0};
for (int32_t idx = 0; idx < list_sz; idx++) {
if (gpu_list[idx] == invalidIdx) {
break;
}
// Obtain properties of the node
HSAKMT_STATUS err_val = hsaKmtGetNodeProperties(gpu_list[idx], &node_prop);
assert(err_val == HSAKMT_STATUS_SUCCESS && "Error in getting Node Properties");
// Instantiate a Gpu device. The IO links
// of this node have already been registered
const GpuAgent* gpu = DiscoverGpu(gpu_list[idx], node_prop);
assert((node_prop.NumFComputeCores != 0) && (gpu != nullptr) && "GPU device failed discovery.");
}
}
/// @brief Calls Kfd thunk to get the snapshot of the topology of the system,
/// which includes associations between, node, devices, memory and caches.
void BuildTopology() {
@@ -180,20 +281,54 @@ void BuildTopology() {
core::Runtime::runtime_singleton_->SetLinkCount(props.NumNodes);
// Determine and process user's request to surface
// a subset of Gpu devices
int32_t invalidIdx = -1;
std::vector<int32_t> gpu_usr_list;
std::map<uint32_t, int32_t> gpu_usr_map;
bool filter = MapUsrGpuList(props.NumNodes, gpu_usr_map);
int32_t list_sz = gpu_usr_map.size();
if (filter) {
for (int32_t idx = 0; idx < list_sz; idx++) {
gpu_usr_list.push_back(invalidIdx);
}
}
// Discover agents on every node in the platform.
int32_t kfdIdx = 0;
for (HSAuint32 node_id = 0; node_id < props.NumNodes; node_id++) {
HsaNodeProperties node_prop = {0};
if (hsaKmtGetNodeProperties(node_id, &node_prop) != HSAKMT_STATUS_SUCCESS) {
continue;
}
// Instantiate a Cpu/Apu device
const CpuAgent* cpu = DiscoverCpu(node_id, node_prop);
const GpuAgent* gpu = DiscoverGpu(node_id, node_prop);
assert(((node_prop.NumCPUCores == 0) || (cpu != nullptr)) && "CPU device failed discovery.");
assert(!(cpu == NULL && gpu == NULL));
// Current node is either a dGpu or Apu and might belong
// to user visible list. Process node if present in usr
// visible list, continue if not found
if (node_prop.NumFComputeCores != 0) {
if (filter) {
const auto& it = gpu_usr_map.find(kfdIdx);
if (it != gpu_usr_map.end()) {
gpu_usr_list[it->second] = node_id;
}
} else {
gpu_usr_list.push_back(node_id);
}
kfdIdx++;
}
// Register IO links of node without regard to
// it being visible to user or not. It is not
// possible to access links of nodes that are
// not visible
RegisterLinkInfo(node_id, node_prop.NumIOLinks);
}
SurfaceGpuList(gpu_usr_list);
}
bool Load() {
+14 -8
View File
@@ -2,24 +2,24 @@
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
//
// Developed by:
//
//
// AMD Research and AMD HSA Software Development
//
//
// Advanced Micro Devices, Inc.
//
//
// www.amd.com
//
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
@@ -29,7 +29,7 @@
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIESd OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
@@ -71,6 +71,8 @@ class Flag {
enable_sdma_ = os::GetEnvVar("HSA_ENABLE_SDMA");
visible_gpus_ = os::GetEnvVar("ROCR_VISIBLE_DEVICES");
var = os::GetEnvVar("HSA_RUNNING_UNDER_VALGRIND");
running_valgrind_ = (var == "1") ? true : false;
@@ -125,6 +127,8 @@ class Flag {
std::string enable_sdma() const { return enable_sdma_; }
std::string visible_gpus() const { return visible_gpus_; }
uint32_t max_queues() const { return max_queues_; }
size_t scratch_mem_size() const { return scratch_mem_size_; }
@@ -145,6 +149,8 @@ class Flag {
std::string enable_sdma_;
std::string visible_gpus_;
uint32_t max_queues_;
size_t scratch_mem_size_;