Allows users, via env ROCR_VISIBLE_DEVICES, to surface a subset of Gpu devices
Change-Id: I5662639d5d70f054831969669f9d30dec356dd5a Update per review comments Change-Id: I18c7d7cb00b261493b61c2cf5454d486166f40d8
This commit is contained in:
committed by
Sean Keely
parent
014945310a
commit
3fbf03af76
@@ -2,24 +2,24 @@
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
@@ -29,7 +29,7 @@
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
@@ -45,6 +45,13 @@
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
#ifndef NDBEUG
|
||||
#include <iostream>
|
||||
#endif
|
||||
|
||||
#include "hsakmt.h"
|
||||
|
||||
@@ -59,9 +66,79 @@ namespace amd {
|
||||
static const uint kKfdVersionMajor = 0;
|
||||
static const uint kKfdVersionMinor = 99;
|
||||
|
||||
#ifndef NDEBUG
|
||||
static bool PrintUsrGpuMap(std::map<uint32_t, int32_t>& gpu_usr_map) {
|
||||
(void)PrintUsrGpuMap; //Suppress unused symbol warning.
|
||||
std::map<uint32_t, int32_t>::iterator it;
|
||||
for (it = gpu_usr_map.begin(); it != gpu_usr_map.end(); it++) {
|
||||
int32_t usrIdx = it->second;
|
||||
uint32_t kfdIdx = it->first;
|
||||
std::cout << "KfdIdx: " << kfdIdx << " @ UsrIdx: " << usrIdx << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Determines if user has defined the env that indicates which
|
||||
* subset of Gpu's are desired to be surfaced. If defined the
|
||||
* set of Gpu's are captured into a map of Gpu index and
|
||||
*
|
||||
* @return true if env is not blank, false otherwise. It is
|
||||
* possible to have zero devices surfaced even when env is
|
||||
* not blank.
|
||||
*/
|
||||
static bool MapUsrGpuList(int32_t numNodes, std::map<uint32_t, int32_t>& gpu_usr_map) {
|
||||
const std::string& env_value = core::Runtime::runtime_singleton_->flag().visible_gpus();
|
||||
if (env_value.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Capture the env value string as a parsable stream
|
||||
std::stringstream stream(env_value);
|
||||
|
||||
// Read stream until there are no more tokens
|
||||
int32_t usrIdx = 0;
|
||||
int32_t token = 0x11231926;
|
||||
while (!stream.eof()) {
|
||||
// Read the option value
|
||||
stream >> token;
|
||||
if (stream.fail()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Stop processing input tokens if invalid index is seen
|
||||
// A value that is less than zero or greater than the
|
||||
// number of Numa nodes is considered invalid
|
||||
if ((token < 0) || (token >= numNodes)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Determine if current value has been seen before
|
||||
// @note: Currently we are interpreting a repeat as
|
||||
// an invalid index i.e. is equal to -1
|
||||
bool exists = gpu_usr_map.find(token) != gpu_usr_map.end();
|
||||
if (exists) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Update Gpu User map table
|
||||
gpu_usr_map[token] = usrIdx++;
|
||||
|
||||
// Ignore the delimiter
|
||||
if (stream.peek() == ',') {
|
||||
stream.ignore();
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
CpuAgent* DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) {
|
||||
if (node_prop.NumCPUCores == 0) {
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
CpuAgent* cpu = new CpuAgent(node_id, node_prop);
|
||||
@@ -72,7 +149,7 @@ CpuAgent* DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) {
|
||||
|
||||
GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop) {
|
||||
if (node_prop.NumFComputeCores == 0) {
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
GpuAgent* gpu = new GpuAgent(node_id, node_prop);
|
||||
@@ -152,6 +229,30 @@ void RegisterLinkInfo(uint32_t node_id, uint32_t num_link) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the list of Gpus that are surfaced to user
|
||||
*/
|
||||
static void SurfaceGpuList(std::vector<int32_t>& gpu_list) {
|
||||
// Process user visible Gpu devices
|
||||
int32_t invalidIdx = -1;
|
||||
int32_t list_sz = gpu_list.size();
|
||||
HsaNodeProperties node_prop = {0};
|
||||
for (int32_t idx = 0; idx < list_sz; idx++) {
|
||||
if (gpu_list[idx] == invalidIdx) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Obtain properties of the node
|
||||
HSAKMT_STATUS err_val = hsaKmtGetNodeProperties(gpu_list[idx], &node_prop);
|
||||
assert(err_val == HSAKMT_STATUS_SUCCESS && "Error in getting Node Properties");
|
||||
|
||||
// Instantiate a Gpu device. The IO links
|
||||
// of this node have already been registered
|
||||
const GpuAgent* gpu = DiscoverGpu(gpu_list[idx], node_prop);
|
||||
assert((node_prop.NumFComputeCores != 0) && (gpu != nullptr) && "GPU device failed discovery.");
|
||||
}
|
||||
}
|
||||
|
||||
/// @brief Calls Kfd thunk to get the snapshot of the topology of the system,
|
||||
/// which includes associations between, node, devices, memory and caches.
|
||||
void BuildTopology() {
|
||||
@@ -180,20 +281,54 @@ void BuildTopology() {
|
||||
|
||||
core::Runtime::runtime_singleton_->SetLinkCount(props.NumNodes);
|
||||
|
||||
// Determine and process user's request to surface
|
||||
// a subset of Gpu devices
|
||||
int32_t invalidIdx = -1;
|
||||
std::vector<int32_t> gpu_usr_list;
|
||||
std::map<uint32_t, int32_t> gpu_usr_map;
|
||||
bool filter = MapUsrGpuList(props.NumNodes, gpu_usr_map);
|
||||
int32_t list_sz = gpu_usr_map.size();
|
||||
if (filter) {
|
||||
for (int32_t idx = 0; idx < list_sz; idx++) {
|
||||
gpu_usr_list.push_back(invalidIdx);
|
||||
}
|
||||
}
|
||||
|
||||
// Discover agents on every node in the platform.
|
||||
int32_t kfdIdx = 0;
|
||||
for (HSAuint32 node_id = 0; node_id < props.NumNodes; node_id++) {
|
||||
HsaNodeProperties node_prop = {0};
|
||||
if (hsaKmtGetNodeProperties(node_id, &node_prop) != HSAKMT_STATUS_SUCCESS) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Instantiate a Cpu/Apu device
|
||||
const CpuAgent* cpu = DiscoverCpu(node_id, node_prop);
|
||||
const GpuAgent* gpu = DiscoverGpu(node_id, node_prop);
|
||||
assert(((node_prop.NumCPUCores == 0) || (cpu != nullptr)) && "CPU device failed discovery.");
|
||||
|
||||
assert(!(cpu == NULL && gpu == NULL));
|
||||
// Current node is either a dGpu or Apu and might belong
|
||||
// to user visible list. Process node if present in usr
|
||||
// visible list, continue if not found
|
||||
if (node_prop.NumFComputeCores != 0) {
|
||||
if (filter) {
|
||||
const auto& it = gpu_usr_map.find(kfdIdx);
|
||||
if (it != gpu_usr_map.end()) {
|
||||
gpu_usr_list[it->second] = node_id;
|
||||
}
|
||||
} else {
|
||||
gpu_usr_list.push_back(node_id);
|
||||
}
|
||||
kfdIdx++;
|
||||
}
|
||||
|
||||
// Register IO links of node without regard to
|
||||
// it being visible to user or not. It is not
|
||||
// possible to access links of nodes that are
|
||||
// not visible
|
||||
RegisterLinkInfo(node_id, node_prop.NumIOLinks);
|
||||
}
|
||||
|
||||
SurfaceGpuList(gpu_usr_list);
|
||||
}
|
||||
|
||||
bool Load() {
|
||||
|
||||
@@ -2,24 +2,24 @@
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
@@ -29,7 +29,7 @@
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIESd OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
@@ -71,6 +71,8 @@ class Flag {
|
||||
|
||||
enable_sdma_ = os::GetEnvVar("HSA_ENABLE_SDMA");
|
||||
|
||||
visible_gpus_ = os::GetEnvVar("ROCR_VISIBLE_DEVICES");
|
||||
|
||||
var = os::GetEnvVar("HSA_RUNNING_UNDER_VALGRIND");
|
||||
running_valgrind_ = (var == "1") ? true : false;
|
||||
|
||||
@@ -125,6 +127,8 @@ class Flag {
|
||||
|
||||
std::string enable_sdma() const { return enable_sdma_; }
|
||||
|
||||
std::string visible_gpus() const { return visible_gpus_; }
|
||||
|
||||
uint32_t max_queues() const { return max_queues_; }
|
||||
|
||||
size_t scratch_mem_size() const { return scratch_mem_size_; }
|
||||
@@ -145,6 +149,8 @@ class Flag {
|
||||
|
||||
std::string enable_sdma_;
|
||||
|
||||
std::string visible_gpus_;
|
||||
|
||||
uint32_t max_queues_;
|
||||
|
||||
size_t scratch_mem_size_;
|
||||
|
||||
Reference in New Issue
Block a user