From 3fbf03af76cc36c74aceb069eeef466e4f6d1f32 Mon Sep 17 00:00:00 2001 From: Ramesh Errabolu Date: Thu, 31 Jan 2019 17:27:06 -0600 Subject: [PATCH] Allows users, via env ROCR_VISIBLE_DEVICES, to surface a subset of Gpu devices Change-Id: I5662639d5d70f054831969669f9d30dec356dd5a Update per review comments Change-Id: I18c7d7cb00b261493b61c2cf5454d486166f40d8 --- .../hsa-runtime/core/runtime/amd_topology.cpp | 159 ++++++++++++++++-- runtime/hsa-runtime/core/util/flag.h | 22 ++- 2 files changed, 161 insertions(+), 20 deletions(-) diff --git a/runtime/hsa-runtime/core/runtime/amd_topology.cpp b/runtime/hsa-runtime/core/runtime/amd_topology.cpp index bf41ebfc57..49a8b3e912 100644 --- a/runtime/hsa-runtime/core/runtime/amd_topology.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_topology.cpp @@ -2,24 +2,24 @@ // // The University of Illinois/NCSA // Open Source License (NCSA) -// +// // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// +// // Developed by: -// +// // AMD Research and AMD HSA Software Development -// +// // Advanced Micro Devices, Inc. -// +// // www.amd.com -// +// // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to // deal with the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, // and/or sell copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following conditions: -// +// // - Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimers. // - Redistributions in binary form must reproduce the above copyright @@ -29,7 +29,7 @@ // nor the names of its contributors may be used to endorse or promote // products derived from this Software without specific prior written // permission. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -45,6 +45,13 @@ #include #include #include +#include +#include +#include + +#ifndef NDBEUG +#include +#endif #include "hsakmt.h" @@ -59,9 +66,79 @@ namespace amd { static const uint kKfdVersionMajor = 0; static const uint kKfdVersionMinor = 99; +#ifndef NDEBUG +static bool PrintUsrGpuMap(std::map& gpu_usr_map) { + (void)PrintUsrGpuMap; //Suppress unused symbol warning. + std::map::iterator it; + for (it = gpu_usr_map.begin(); it != gpu_usr_map.end(); it++) { + int32_t usrIdx = it->second; + uint32_t kfdIdx = it->first; + std::cout << "KfdIdx: " << kfdIdx << " @ UsrIdx: " << usrIdx << std::endl; + } + return true; +} +#endif + +/** + * Determines if user has defined the env that indicates which + * subset of Gpu's are desired to be surfaced. If defined the + * set of Gpu's are captured into a map of Gpu index and + * + * @return true if env is not blank, false otherwise. It is + * possible to have zero devices surfaced even when env is + * not blank. + */ +static bool MapUsrGpuList(int32_t numNodes, std::map& gpu_usr_map) { + const std::string& env_value = core::Runtime::runtime_singleton_->flag().visible_gpus(); + if (env_value.empty()) { + return false; + } + + // Capture the env value string as a parsable stream + std::stringstream stream(env_value); + + // Read stream until there are no more tokens + int32_t usrIdx = 0; + int32_t token = 0x11231926; + while (!stream.eof()) { + // Read the option value + stream >> token; + if (stream.fail()) { + return true; + } + + // Stop processing input tokens if invalid index is seen + // A value that is less than zero or greater than the + // number of Numa nodes is considered invalid + if ((token < 0) || (token >= numNodes)) { + return true; + } + + // Determine if current value has been seen before + // @note: Currently we are interpreting a repeat as + // an invalid index i.e. is equal to -1 + bool exists = gpu_usr_map.find(token) != gpu_usr_map.end(); + if (exists) { + return true; + } + + // Update Gpu User map table + gpu_usr_map[token] = usrIdx++; + + // Ignore the delimiter + if (stream.peek() == ',') { + stream.ignore(); + } else { + return true; + } + } + + return true; +} + CpuAgent* DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) { if (node_prop.NumCPUCores == 0) { - return NULL; + return nullptr; } CpuAgent* cpu = new CpuAgent(node_id, node_prop); @@ -72,7 +149,7 @@ CpuAgent* DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) { GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop) { if (node_prop.NumFComputeCores == 0) { - return NULL; + return nullptr; } GpuAgent* gpu = new GpuAgent(node_id, node_prop); @@ -152,6 +229,30 @@ void RegisterLinkInfo(uint32_t node_id, uint32_t num_link) { } } +/** + * Process the list of Gpus that are surfaced to user + */ +static void SurfaceGpuList(std::vector& gpu_list) { + // Process user visible Gpu devices + int32_t invalidIdx = -1; + int32_t list_sz = gpu_list.size(); + HsaNodeProperties node_prop = {0}; + for (int32_t idx = 0; idx < list_sz; idx++) { + if (gpu_list[idx] == invalidIdx) { + break; + } + + // Obtain properties of the node + HSAKMT_STATUS err_val = hsaKmtGetNodeProperties(gpu_list[idx], &node_prop); + assert(err_val == HSAKMT_STATUS_SUCCESS && "Error in getting Node Properties"); + + // Instantiate a Gpu device. The IO links + // of this node have already been registered + const GpuAgent* gpu = DiscoverGpu(gpu_list[idx], node_prop); + assert((node_prop.NumFComputeCores != 0) && (gpu != nullptr) && "GPU device failed discovery."); + } +} + /// @brief Calls Kfd thunk to get the snapshot of the topology of the system, /// which includes associations between, node, devices, memory and caches. void BuildTopology() { @@ -180,20 +281,54 @@ void BuildTopology() { core::Runtime::runtime_singleton_->SetLinkCount(props.NumNodes); + // Determine and process user's request to surface + // a subset of Gpu devices + int32_t invalidIdx = -1; + std::vector gpu_usr_list; + std::map gpu_usr_map; + bool filter = MapUsrGpuList(props.NumNodes, gpu_usr_map); + int32_t list_sz = gpu_usr_map.size(); + if (filter) { + for (int32_t idx = 0; idx < list_sz; idx++) { + gpu_usr_list.push_back(invalidIdx); + } + } + // Discover agents on every node in the platform. + int32_t kfdIdx = 0; for (HSAuint32 node_id = 0; node_id < props.NumNodes; node_id++) { HsaNodeProperties node_prop = {0}; if (hsaKmtGetNodeProperties(node_id, &node_prop) != HSAKMT_STATUS_SUCCESS) { continue; } + // Instantiate a Cpu/Apu device const CpuAgent* cpu = DiscoverCpu(node_id, node_prop); - const GpuAgent* gpu = DiscoverGpu(node_id, node_prop); + assert(((node_prop.NumCPUCores == 0) || (cpu != nullptr)) && "CPU device failed discovery."); - assert(!(cpu == NULL && gpu == NULL)); + // Current node is either a dGpu or Apu and might belong + // to user visible list. Process node if present in usr + // visible list, continue if not found + if (node_prop.NumFComputeCores != 0) { + if (filter) { + const auto& it = gpu_usr_map.find(kfdIdx); + if (it != gpu_usr_map.end()) { + gpu_usr_list[it->second] = node_id; + } + } else { + gpu_usr_list.push_back(node_id); + } + kfdIdx++; + } + // Register IO links of node without regard to + // it being visible to user or not. It is not + // possible to access links of nodes that are + // not visible RegisterLinkInfo(node_id, node_prop.NumIOLinks); } + + SurfaceGpuList(gpu_usr_list); } bool Load() { diff --git a/runtime/hsa-runtime/core/util/flag.h b/runtime/hsa-runtime/core/util/flag.h index d6375617d9..a2203791b1 100644 --- a/runtime/hsa-runtime/core/util/flag.h +++ b/runtime/hsa-runtime/core/util/flag.h @@ -2,24 +2,24 @@ // // The University of Illinois/NCSA // Open Source License (NCSA) -// +// // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// +// // Developed by: -// +// // AMD Research and AMD HSA Software Development -// +// // Advanced Micro Devices, Inc. -// +// // www.amd.com -// +// // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to // deal with the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, // and/or sell copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following conditions: -// +// // - Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimers. // - Redistributions in binary form must reproduce the above copyright @@ -29,7 +29,7 @@ // nor the names of its contributors may be used to endorse or promote // products derived from this Software without specific prior written // permission. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIESd OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -71,6 +71,8 @@ class Flag { enable_sdma_ = os::GetEnvVar("HSA_ENABLE_SDMA"); + visible_gpus_ = os::GetEnvVar("ROCR_VISIBLE_DEVICES"); + var = os::GetEnvVar("HSA_RUNNING_UNDER_VALGRIND"); running_valgrind_ = (var == "1") ? true : false; @@ -125,6 +127,8 @@ class Flag { std::string enable_sdma() const { return enable_sdma_; } + std::string visible_gpus() const { return visible_gpus_; } + uint32_t max_queues() const { return max_queues_; } size_t scratch_mem_size() const { return scratch_mem_size_; } @@ -145,6 +149,8 @@ class Flag { std::string enable_sdma_; + std::string visible_gpus_; + uint32_t max_queues_; size_t scratch_mem_size_;