Unify environment variable management (#235)

* Add environment variable configuration infrastructure
  - Namespace rocshmem::envvar
  - Track all config env vars in per-category lists
  - Remove duplicates from list of allowed env var types
  - Reject negative inputs for unsigned integer types
  - Accept empty strings for std::string
  - Print error source location using C++20 std::source_location
  - Unit tests
* Port environment variables
  - ROCSHMEM_UNIQUEID_WITH_MPI
  - ROCSHMEM_RO_DISABLE_IPC
  - ROCSHMEM_BOOTSTRAP_TIMEOUT
  - ROCSHMEM_BOOTSTRAP_HOSTID
  - ROCSHMEM_BOOTSTRAP_SOCKET_IFNAME
  - ROCSHMEM_RO_PROGRESS_DELAY
  - ROCSHMEM_BOOTSTRAP_SOCKET_FAMILY
  - ROCSHMEM_MAX_NUM_CONTEXTS
    + Merge the independent per-backend copies into a single variable
      that is used by all three backends (IPC, RO, GDA).
    + Set default to 32 (for GDA); prior default for IPC and RO was 1024.
  - ROCSHMEM_MAX_NUM_HOST_CONTEXTS
  - ROCSHMEM_MAX_WF_BUFFERS
  - ROCSHMEM_SQ_SIZE
  - ROCSHMEM_RO_NET_CPU_QUEUE
    + Renamed from RO_NET_CPU_QUEUE
    + Change env var input type to bool, default to false
    + Invert code logic: setting RO_NET_CPU_QUEUE to anything
      would /disable/ a variable gpu_queue, which defaulted to true.
      Variable is now named config::ro::net_cpu_queue,
      with all prior checks for gpu_queue inverted.
  - ROCSHMEM_USE_IB_HCA
  - ROCSHMEM_HEAP_SIZE
    + Defaults to 1L << 30 i.e. 1 GiB,
      from default heap size in memory/heap_memory.hpp.
  - ROCSHMEM_MAX_NUM_TEAMS
    + Unlike other env vars, this can be referenced from devices.
    + Function currently narrows from size_t to int: uses need to be audited
      for safety and correctness in using size_t directly.
  - ROCSHMEM_GDA_ALTERNATE_QP_PORTS
* New env var ROCSHMEM_DEBUG
  - Debug levels:
    + NONE
    + VERSION
    + WARN
    + INFO
    + TRACE
  - Currently unused - will be added later
  - Mirrors RCCL debug control
* Remove rocshmem::rocshmem_env_config
* Change interface for GetClosestNicToGpu
  to accept const char** instead of char**:
  the pointed-to string does not need to be modified
  - Files were not audited for inclusion of util.hpp only for env vars
---------
Signed-off-by: Omri Mor <Omri.Mor@amd.com>
Этот коммит содержится в:
Omri Mor
2025-10-06 10:05:57 -07:00
коммит произвёл GitHub
родитель 0a4f8a83b9
Коммит a0fcbf8d35
29 изменённых файлов: 1070 добавлений и 277 удалений
+1
Просмотреть файл
@@ -30,6 +30,7 @@ target_sources(
PRIVATE
atomic_return.cpp
backend_bc.cpp
envvar.cpp
context_host.cpp
context_device.cpp
mpi_instance.cpp
+5 -11
Просмотреть файл
@@ -34,6 +34,7 @@
#include <fstream>
#include <cstring>
#include "envvar.hpp"
#include "socket.hpp"
#include "utils.hpp"
#include "util.hpp"
@@ -85,15 +86,8 @@ static uint16_t socketToPort(union SocketAddress* addr) {
/* Allow the user to force the IPv4/IPv6 interface selection */
static int envSocketFamily(void) {
int family = -1; // Family selection is not forced, will use first one found
const std::string& socketFamily = rocshmem_env_.get_bootstrap_socket_family();
if (socketFamily == "") return family;
if (socketFamily == "AF_INET")
family = AF_INET; // IPv4
else if (socketFamily == "AF_INET6")
family = AF_INET6; // IPv6
return family;
// envvar::types::socket_family enum is defined directly from AF_* constants
return static_cast<int>(envvar::bootstrap::socket_family.get_value());
}
static int findInterfaces(const char* prefixList, char* names, union SocketAddress* addrs,
@@ -122,7 +116,7 @@ static int findInterfaces(const char* prefixList, char* names, union SocketAddre
SocketToString((union SocketAddress*)interface->ifa_addr, line));
/* Allow the caller to force the socket family type */
if (sock_family != -1 && family != sock_family) continue;
if (sock_family != AF_UNSPEC && family != sock_family) continue;
/* We also need to skip IPv6 loopback interfaces */
if (family == AF_INET6) {
@@ -334,7 +328,7 @@ int FindInterfaces(char* ifNames, union SocketAddress* ifAddrs, int ifNameMaxSiz
int sock_family = envSocketFamily();
// User specified interface
const std::string& socketIfname = rocshmem_env_.get_bootstrap_socket_ifname();
const std::string& socketIfname = envvar::bootstrap::socket_ifname;
if (inputIfName) {
DPRINTF("using iterface %s", inputIfName);
nIfs = findInterfaces(inputIfName, ifNames, ifAddrs, sock_family, ifNameMaxSize, maxIfs);
+3 -2
Просмотреть файл
@@ -33,6 +33,7 @@
#include <string>
#include <iostream>
#include "envvar.hpp"
#include "utils.hpp"
#include "util.hpp"
@@ -101,8 +102,8 @@ uint64_t computeHostHash(void) {
std::string hostName = getHostName(hashLen, '\0');
strncpy(hostHash, hostName.c_str(), hostName.size());
std::string hostid = rocshmem_env_.get_bootstrap_hostid();
if (hostid != "") {
const std::string& hostid = envvar::bootstrap::hostid;
if (!hostid.empty()) {
strncpy(hostHash, hostid.c_str(), hashLen);
} else if (hostName.size() < hashLen) {
std::ifstream file(HOSTID_FILE, std::ios::binary);
+152
Просмотреть файл
@@ -0,0 +1,152 @@
/******************************************************************************
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*****************************************************************************/
#include "envvar.hpp"
#include <istream>
#include <list>
#include <mutex>
#include <ostream>
#include <string>
#include <tuple>
#include <unordered_map>
#include <unistd.h>
namespace rocshmem {
namespace envvar {
inline namespace _base {
const var<bool> uniqueid_with_mpi("UNIQUEID_WITH_MPI", "", false);
const var<types::debug_level> debug_level("DEBUG_LEVEL", "", types::debug_level::NONE);
const var<size_t> heap_size("HEAP_SIZE", "", 1L << 30);
const var<size_t> max_num_teams("MAX_NUM_TEAMS", "", 40);
const var<size_t> max_num_host_contexts("MAX_NUM_HOST_CONTEXTS", "", 1);
const var<size_t> max_num_contexts("MAX_NUM_CONTEXTS", "", 32);
const var<size_t> max_wavefront_buffers("MAX_WF_BUFFERS", "", 1024);
const var<std::string> requested_dev("USE_IB_HCA", "");
const var<uint32_t> sq_size("SQ_SIZE", "", 1024);
} // inline namespace _base
namespace bootstrap {
const var<int64_t> timeout("TIMEOUT", "", 5);
const var<std::string> hostid("HOSTID", "");
const var<types::socket_family> socket_family("SOCKET_FAMILY", "", types::socket_family::UNSPEC);
const var<std::string> socket_ifname("SOCKET_IFNAME", "");
} // namespace bootstrap
namespace ro {
const var<bool> disable_ipc("DISABLE_IPC", "", false);
const var<useconds_t> progress_delay("PROGRESS_DELAY", "", 3);
const var<bool> net_cpu_queue("NET_CPU_QUEUE", "", false);
} // namespace ro
namespace gda {
const var<bool> alternate_qp_ports("ALTERNATE_QP_PORTS", "", true);
} // namespace gda
namespace _detail {
std::tuple<var_map_t&, std::mutex&> get_var_map() {
// construct on first use idiom
// allocate variable_map on heap to prevent static initialization order fiasco
static auto variable_map = new var_map_t();
static std::mutex map_mutex;
// use std::tie to return a tuple of references
return std::tie(*variable_map, map_mutex);
}
} // namespace _detail
namespace types {
inline namespace _sf {
std::istream& operator>>(std::istream& is, socket_family& family) {
std::string family_str;
is >> family_str;
if (family_str == "AF_INET" ||
family_str == "INET") {
family = socket_family::INET;
} else if (family_str == "AF_INET6" ||
family_str == "INET6") {
family = socket_family::INET6;
} else if (family_str == "AF_UNSPEC" ||
family_str == "UNSPEC") {
family = socket_family::UNSPEC;
} else {
// all other inputs are invalid
is.setstate(std::ios_base::failbit);
family = socket_family::UNSPEC;
}
return is;
}
std::ostream& operator<<(std::ostream& os, const socket_family& family) {
switch (family) {
case socket_family::UNSPEC:
return os << "AF_UNSPEC";
case socket_family::INET:
return os << "AF_INET";
case socket_family::INET6:
return os << "AF_INET6";
}
}
} // inline namespace _sf
inline namespace _debug {
std::istream& operator>>(std::istream& is, debug_level& level) {
std::string level_str;
is >> level_str;
if (level_str == "NONE") {
level = debug_level::NONE;
} else if (level_str == "VERSION") {
level = debug_level::VERSION;
} else if (level_str == "WARN") {
level = debug_level::WARN;
} else if (level_str == "INFO") {
level = debug_level::INFO;
} else if (level_str == "TRACE") {
level = debug_level::TRACE;
} else {
// all other inputs are invalid
is.setstate(std::ios_base::failbit);
level = debug_level::NONE;
}
return is;
}
std::ostream& operator<<(std::ostream& os, const debug_level& level) {
switch (level) {
case debug_level::NONE:
return os << "NONE";
case debug_level::VERSION:
return os << "VERSION";
case debug_level::WARN:
return os << "WARN";
case debug_level::INFO:
return os << "INFO";
case debug_level::TRACE:
return os << "TRACE";
}
}
} // inline namespace _debug
} // namespace types
} // namespace envvar
} // namespace rocshmem
+450
Просмотреть файл
@@ -0,0 +1,450 @@
/******************************************************************************
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*****************************************************************************/
#ifndef LIBRARY_SRC_ENVVAR_HPP_
#define LIBRARY_SRC_ENVVAR_HPP_
#include <cstdlib>
#include <functional>
#include <iomanip>
#include <iostream>
#include <istream>
#include <iterator>
#include <list>
#include <mutex>
#include <ostream>
#include <source_location>
#include <sstream>
#include <string>
#include <type_traits>
#include <unordered_map>
#include <variant>
#include <sys/socket.h>
#include <unistd.h>
// forward declarations
namespace rocshmem {
namespace envvar {
namespace _detail {
template <typename T> class var;
} // namespace _detail
namespace category {
enum class tag;
} // namespace category
namespace types {
inline namespace _sf {
enum class socket_family;
} // inline namespace _sf
inline namespace _debug {
enum class debug_level;
} // inline namespace _debug
} // namespace types
template <typename T, category::tag> class var;
template <typename... T>
struct type_sequence {
using variant = std::variant<T...>;
using variant_ref = std::variant<std::reference_wrapper<T>...>;
using variant_cref = std::variant<std::reference_wrapper<const T>...>;
template <typename U> struct contains : std::disjunction<std::is_same<T, U>...> { };
template <typename U> static constexpr bool contains_v = contains<U>::value;
using var_variant = std::variant<_detail::var<T>...>;
using var_variant_ref = std::variant<std::reference_wrapper<_detail::var<T>>...>;
using var_variant_cref = std::variant<std::reference_wrapper<const _detail::var<T>>...>;
};
// primary template: start with an empty type_sequence<> on the left
template <typename... T>
struct unique_type_sequence {
using type = typename unique_type_sequence<type_sequence<>, T...>::type;
};
// convenience alias
template <typename... T>
using unique_type_sequence_t = typename unique_type_sequence<T...>::type;
// base case: the type of a type_sequence is a type_sequence
template <typename... T>
struct unique_type_sequence<type_sequence<T...>> {
using type = type_sequence<T...>;
};
// recursion: type_sequence<T...> is already filtered
// if type_sequence<T...> contains U, discard U and recurse with remaining V...
// else, add U to form type_sequence<T..., U> and recurse with remaining V...
template <typename... T, typename U, typename... V>
struct unique_type_sequence<type_sequence<T...>, U, V...> {
using type = std::conditional_t<type_sequence<T...>::template contains_v<U>,
unique_type_sequence_t<type_sequence<T...>, V...>,
unique_type_sequence_t<type_sequence<T..., U>, V...>>;
};
using var_types = unique_type_sequence_t<bool,
size_t,
int64_t,
uint32_t,
useconds_t,
std::string,
types::socket_family,
types::debug_level>;
} // namespace envvar
} // namespace rocshmem
namespace rocshmem {
namespace envvar {
namespace category {
// env var categories
// when adding a new category, make sure to add prefix<tag::CATEGORY>
enum class tag {
ROCSHMEM,
BOOTSTRAP,
REVERSE_OFFLOAD,
GDA,
};
// env var string prefixes
// prevent instantiation of default template; require specializations for each tag
// if P2041 (see https://wg21.link/P2041) gets merged, can be changed to just = delete instead
template <tag C> inline constexpr std::enable_if_t<!std::is_enum_v<decltype(C)>> prefix;
template <> inline constexpr const char* prefix<tag::ROCSHMEM> = "ROCSHMEM";
template <> inline constexpr const char* prefix<tag::BOOTSTRAP> = "ROCSHMEM_BOOTSTRAP";
template <> inline constexpr const char* prefix<tag::REVERSE_OFFLOAD> = "ROCSHMEM_RO";
template <> inline constexpr const char* prefix<tag::GDA> = "ROCSHMEM_GDA";
} // namespace category
namespace parser {
// base parser template
// calls operator>>(std::istream&, T&)
template <typename T>
struct parse {
std::istream& operator()(std::istream& is, T& value) const {
// accept all bases for integer types
if constexpr (std::is_integral_v<T>) {
is >> std::setbase(0);
}
// check if input is negative: remove whitespace, then check if first char is '-'
if constexpr (std::is_unsigned_v<T>) {
is >> std::ws;
auto first = is.peek();
if (first == '-') {
is.setstate(std::ios_base::failbit);
return is;
}
}
return is >> value;
}
};
// string parser specialization, parse entire line
// operator>>(std::istream&, std::string&) stops on the first whitespace character
template <> inline
std::istream& parse<std::string>::operator()(std::istream& is, std::string& value) const {
std::getline(is, value);
// std::getline sets failbit when no characters are extracted
// setting ROCSHMEM_ENVVAR='' can be valid behavior, so clear failbit when this happens
if (value.empty()) {
is.clear();
}
return is;
}
// bool parser specialization, parse both false/true and 0/1
// to accept true/false, True/False, on/off, On/Off, ON/OFF, 0/1, etc.,
// can create a facet inheriting from std::num_get<char> and overriding do_get(..., bool& v)
// then use the locale with that facet: is.imbue(std::locale(is.getloc(), new bool_get{}))
// note that std::locale is responsible for reference-counting the facets, which is very silly
// can the locale (and/or facets) have static storage duration?
template <> inline
std::istream& parse<bool>::operator()(std::istream& is, bool& value) const {
auto pos = is.tellg();
is >> std::boolalpha >> value;
if (is.fail()) {
is.clear();
is.seekg(pos);
is >> std::noboolalpha >> value;
}
return is;
}
// decimal integer parser
template <typename T, std::enable_if_t<std::is_integral_v<T>, bool> = true>
struct parse_decimal {
std::istream& operator()(std::istream& is, T& value) const {
return is >> std::dec >> value;
}
};
// hexadecimal integer parser
template <typename T, std::enable_if_t<std::is_integral_v<T>, bool> = true>
struct parse_hex {
std::istream& operator()(std::istream& is, T& value) const {
return is >> std::hex >> value;
}
};
} // namespace parser
// namespace for defining custom types, for parsing (mostly enums)
namespace types {
// namespace to contain socket_family stuff
inline namespace _sf {
enum class socket_family : int {
UNSPEC = AF_UNSPEC,
INET = AF_INET,
INET6 = AF_INET6,
};
std::istream& operator>>(std::istream& is, socket_family& family);
std::ostream& operator<<(std::ostream& os, const socket_family& family);
} // inline namespace _sf
inline namespace _debug {
enum class debug_level {
NONE,
VERSION,
WARN,
INFO,
TRACE,
};
std::istream& operator>>(std::istream& is, debug_level& level);
std::ostream& operator<<(std::ostream& os, const debug_level& level);
} // inline namespace _debug
} // namespace types
namespace _detail {
template <typename T>
class var {
static_assert(var_types::contains_v<T>,
"T is not in the list of environment variable types");
public:
using value_type = T;
using reference = value_type&;
using const_reference = const value_type&;
// primary constructor
template <typename Parser>
var(const std::string& _prefix, const std::string& _name, const std::string& _doc,
const_reference _default_value, Parser parse)
: name(_prefix + "_" + _name),
doc(_doc),
default_value(_default_value),
value(_default_value),
value_set(false) {
const char* env_value = std::getenv(name.c_str());
if (env_value) {
std::istringstream iss{std::string(env_value)};
std::invoke(parse, iss, value);
if (iss.fail()) {
std::cerr << std::source_location::current().function_name() << ": invalid argument "
<< name << "='" << env_value << "'" << std::endl;
value = default_value;
} else {
value_set = true;
}
}
}
// can't figure out how to do an out-of-line definition for this
template <typename CharT, typename Traits>
friend
std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
const var<value_type>& v) {
return os << v.name << "=" << v.value;
}
// public accessors
const std::string& get_name() const {
return name;
}
const std::string& get_doc() const {
return doc;
}
const_reference get_default() const {
return default_value;
}
const_reference get_value() const {
return value;
}
operator const_reference() const {
return value;
}
bool is_default() const {
return !value_set;
}
private:
const std::string name;
const std::string doc;
const value_type default_value;
value_type value;
bool value_set;
};
// var_list is a list<variant<var<T>...>> for all valid var types
// use std::visit for operations on the list elements
using var_list_t = std::list<var_types::var_variant_cref>;
// var_map is a map<category, var_list>
using var_map_t = std::unordered_map<category::tag, var_list_t>;
// returns a tuple<var_map&, mutex&>, where var_map& and mutex& are statically allocated
// in particular, the map is allocated so as to fix the static initialization order problem
// since these are used inside the constructor for envvar::var<T, C> to register variables
// which are expected to be allocated statically as well
std::tuple<var_map_t&, std::mutex&> get_var_map();
// register the var<T, C> with the global variable map
// map from category C to a list of variables in that category
// returns a const_iterator to the inserted variable
// list is heterogeneous over all valid variable types, using variant<_detail::var<T>&...>
// locks mutex to ensure that there aren't race conditions due to parallel modifications
template <typename T, category::tag C>
auto register_variable(const envvar::var<T, C>& v) {
auto [var_map, map_mutex] = _detail::get_var_map();
std::lock_guard map_lock(map_mutex);
// emplace variable to back of list
// conversion sequence:
// const var<T, C>&
// => const _detail::var<T>&
// => std::reference_wrapper<const _detail::var<T>>
// => std::variant<std::reference_wrapper<const _detail::var<T>>...>
auto& var_list = var_map[C];
var_list.emplace_back(v);
// std::list::cend() returns iterator to past-the-end
// since we just emplaced var to back of list, one before end() will be the back.
return std::prev(var_list.cend());
}
// deregister the variable at the const_iterator pos from the list for category C
// locks mutex to ensure that there aren't race conditions due to parallel modifications
template <category::tag C>
void deregister_variable(_detail::var_list_t::const_iterator pos) {
auto [var_map, map_mutex] = _detail::get_var_map();
std::lock_guard map_lock(map_mutex);
var_map[C].erase(pos);
}
} // namespace _detail
// class var<Type, Category>
// reads the specified environment variable using std::getenv()
// if it set, the variable is parsed (using parser::parse<Type> by default)
// if it is unset or parsing fails, a default value is used instead
template <typename T, category::tag C = category::tag::ROCSHMEM>
class var : public _detail::var<T> {
public:
// type aliases aren't inherited, for some reason?
using value_type = T;
using reference = value_type&;
using const_reference = const value_type&;
static constexpr category::tag category = C;
// primary constructor
// calls _detail::var<T>::var() with the category prefix
// registers *this with var map and saves the iterator, so it can be deregistered later
template <typename Parser>
var(const std::string& name, const std::string& doc,
const_reference default_value, Parser parse)
: _detail::var<T>(category::prefix<C>, name, doc, default_value, parse),
var_map_pos(_detail::register_variable(*this)) { }
// convenience (delegating) constructors
//
// ensure that var(name, doc, default_value) is called instead of var(name, doc, parse)
// remove the overload from consideration when Parser is not invocable
template <typename Parser,
typename = std::enable_if_t<std::is_invocable_v<Parser, std::istream&, reference>>>
var(const std::string& name, const std::string& doc, Parser parse)
: var(name, doc, T{}, parse) { }
var(const std::string& name, const std::string& doc, const_reference default_value)
: var(name, doc, default_value, parser::parse<T>{}) { }
var(const std::string& name, const std::string& doc)
: var(name, doc, T{}, parser::parse<T>{}) { }
// deregister *this from var map using saved iterator pos
~var() {
_detail::deregister_variable<C>(var_map_pos);
}
private:
_detail::var_list_t::const_iterator var_map_pos;
};
inline namespace _base {
extern const var<bool> uniqueid_with_mpi;
extern const var<types::debug_level> debug_level;
extern const var<size_t> heap_size;
extern const var<size_t> max_num_teams;
/**
* @brief Maximum number of contexts for the application
*/
extern const var<size_t> max_num_host_contexts;
/**
* @brief Maximum number of contexts used in library
*/
extern const var<size_t> max_num_contexts;
/**
* @brief Maximum number of wavefront buffer arrays supported in the default
* context.
*
* This value determines the size of the status flag, rocshmem_g return, and
* rocshmem atomic return buffers.
*/
extern const var<size_t> max_wavefront_buffers;
extern const var<std::string> requested_dev;
extern const var<uint32_t> sq_size;
} // inline namespace _base
namespace bootstrap {
template <typename T> using var = var<T, category::tag::BOOTSTRAP>;
extern const var<int64_t> timeout;
extern const var<std::string> hostid;
extern const var<types::socket_family> socket_family;
extern const var<std::string> socket_ifname;
} // namespace bootstrap
namespace ro {
template <typename T> using var = var<T, category::tag::REVERSE_OFFLOAD>;
extern const var<bool> disable_ipc;
extern const var<useconds_t> progress_delay;
extern const var<bool> net_cpu_queue;
} // namespace ro
namespace gda {
template <typename T> using var = var<T, category::tag::GDA>;
extern const var<bool> alternate_qp_ports;
} // namespace gda
} // namespace envvar
} // namespace rocshmem
#endif // LIBRARY_SRC_ENVVAR_HPP_
+24 -35
Просмотреть файл
@@ -29,8 +29,9 @@
#include <cassert>
#include "backend_gda.hpp"
#include "mpi_instance.hpp"
#include "envvar.hpp"
#include "gda_team.hpp"
#include "mpi_instance.hpp"
#include "util.hpp"
#include "topology.hpp"
@@ -129,26 +130,14 @@ GDABackend::~GDABackend() {
}
void GDABackend::read_env() {
if (auto maximum_num_contexts_str = getenv("ROCSHMEM_MAX_NUM_CONTEXTS")) {
std::stringstream sstream(maximum_num_contexts_str);
sstream >> maximum_num_contexts_;
}
char* value{nullptr};
if ((value = getenv("ROCSHMEM_USE_IB_HCA"))) {
requested_dev = strdup(value);
if (!envvar::requested_dev.is_default()) {
requested_dev = envvar::requested_dev.get_value().c_str();
} else {
int gpu_dev = 0;
CHECK_HIP(hipGetDevice(&gpu_dev));
int nic_dev = rocshmem::GetClosestNicToGpu(gpu_dev, &requested_dev);
assert (nic_dev != -1);
}
if ((value = getenv("ROCSHMEM_SQ_SIZE"))) {
sq_size = atoi(value);
}
if ((value = getenv("ROCSHMEM_GDA_ALTERNATE_QP_PORTS"))) {
alternate_qp_ports_enabled = atoi(value);
}
}
void GDABackend::setup_ipc() {
@@ -178,9 +167,9 @@ void GDABackend::setup_ctxs() {
setup_host_ctx();
setup_default_ctx();
CHECK_HIP(hipMalloc(&ctx_array, sizeof(GDAContext) * maximum_num_contexts_));
CHECK_HIP(hipMalloc(&ctx_array, sizeof(GDAContext) * envvar::max_num_contexts));
// 0th context is default context
for (size_t i = 0; i < maximum_num_contexts_; i++) {
for (size_t i = 0; i < envvar::max_num_contexts; i++) {
new (&ctx_array[i]) GDAContext(this, i + 1);
ctx_free_list.get()->push_back(ctx_array + i);
}
@@ -188,7 +177,7 @@ void GDABackend::setup_ctxs() {
void GDABackend::cleanup_ctxs() {
ctx_free_list.~FreeListProxy();
for (size_t i = 0; i < maximum_num_contexts_; i++) {
for (size_t i = 0; i < envvar::max_num_contexts; i++) {
ctx_array[i].~GDAContext();
}
@@ -667,7 +656,7 @@ void GDABackend::exchange_qp_dest_info() {
dest_info[i].gid = gid;
}
for (int i = 0; i < maximum_num_contexts_ + 1; i++) {
for (size_t i = 0; i < envvar::max_num_contexts + 1; i++) {
if (backend_comm != MPI_COMM_NULL) {
mpilib_ftable_.Alltoall(MPI_IN_PLACE, sizeof(dest_info_t), MPI_CHAR, dest_info.data() + i * num_pes, sizeof(dest_info_t), MPI_CHAR, backend_comm);
} else {
@@ -718,7 +707,7 @@ void GDABackend::setup_gpu_qps() {
size_t qp_objs_count;
size_t qp_objs_mem_size;
qp_objs_count = (maximum_num_contexts_ + 1) * num_pes;
qp_objs_count = (envvar::max_num_contexts + 1) * num_pes;
qp_objs_mem_size = sizeof(QueuePair) * qp_objs_count;
CHECK_HIP(hipMalloc(&gpu_qps, qp_objs_mem_size));
@@ -726,7 +715,7 @@ void GDABackend::setup_gpu_qps() {
host_qps = (QueuePair*) malloc(qp_objs_mem_size);
CHECK_NNULL(host_qps, "malloc (host_qps)");
for (int i = 0; i < qp_objs_count; i++) {
for (size_t i = 0; i < qp_objs_count; i++) {
new (&host_qps[i]) QueuePair(pd_orig, gda_vendor);
CHECK_HIP(hipMemcpy(&gpu_qps[i], &host_qps[i], sizeof(QueuePair), hipMemcpyDefault));
@@ -741,9 +730,9 @@ void GDABackend::setup_gpu_qps() {
void GDABackend::cleanup_gpu_qps() {
size_t qp_objs_count;
qp_objs_count = (maximum_num_contexts_ + 1) * num_pes;
qp_objs_count = (envvar::max_num_contexts + 1) * num_pes;
for (int i = 0; i < qp_objs_count; i++) {
for (size_t i = 0; i < qp_objs_count; i++) {
host_qps[i].~QueuePair();
}
@@ -919,15 +908,15 @@ void GDABackend::modify_qps_rtr_to_rts() {
void GDABackend::create_queues() {
int ncqes;
int resize_length;
size_t resize_length;
if (gda_vendor == GDAVendor::IONIC) {
ncqes = sq_size << 1;
ncqes = envvar::sq_size << 1;
} else {
ncqes = sq_size;
ncqes = envvar::sq_size;
}
resize_length = (maximum_num_contexts_ + 1) * num_pes;
resize_length = (envvar::max_num_contexts + 1) * num_pes;
dest_info.resize(resize_length);
cqs.resize(resize_length);
@@ -938,25 +927,25 @@ void GDABackend::create_queues() {
if (gda_vendor == GDAVendor::BNXT) {
bnxt_create_cqs(ncqes);
bnxt_create_qps(sq_size);
bnxt_create_qps(envvar::sq_size);
} else {
create_cqs(ncqes);
create_qps(sq_size);
create_qps(envvar::sq_size);
}
alternate_qp_ports();
}
void GDABackend::alternate_qp_ports() {
int cur_qp_idx;
int new_qp_idx;
size_t cur_qp_idx;
size_t new_qp_idx;
/* We can't remap anything */
if (maximum_num_contexts_ == 1) {
if (envvar::max_num_contexts == 1) {
return;
}
if (alternate_qp_ports_enabled) {
if (envvar::gda::alternate_qp_ports) {
/* If we assume two PEs and a default context and two user context,
* initially QPs are in the following port order:
*
@@ -979,8 +968,8 @@ void GDABackend::alternate_qp_ports() {
*/
/* Re-Map each context */
for (int i = 1; i < (maximum_num_contexts_ + 1); i+=2) {
for (int p = 0; p < num_pes; p+=2) {
for (size_t i = 1; i < (envvar::max_num_contexts + 1); i += 2) {
for (size_t p = 0; p < num_pes; p += 2) {
cur_qp_idx = (i * num_pes) + p;
new_qp_idx = cur_qp_idx + 1;
+1 -8
Просмотреть файл
@@ -89,7 +89,7 @@ class GDABackend : public Backend {
union ibv_gid gid;
} dest_info_t;
char *requested_dev = nullptr;
const char *requested_dev = nullptr;
struct ibv_context *context = nullptr;;
struct ibv_pd *pd_orig = nullptr;
enum GDAVendor gda_vendor = GDAVendor::NONE;
@@ -102,14 +102,12 @@ class GDABackend : public Backend {
uint32_t *heap_rkey = nullptr;
struct ibv_mr *heap_mr = nullptr;
uint32_t sq_size = 1024;
uint32_t inline_threshold = 8;
QueuePair *host_qps = nullptr;
QueuePair *gpu_qps = nullptr;
std::vector<ibv_qp*> qps;
std::vector<ibv_cq*> cqs;
std::vector<dest_info_t> dest_info;
int alternate_qp_ports_enabled = 1;;
/* GDA_BNXT START */
std::vector<struct bnxt_host_qp> bnxt_qps;
@@ -422,11 +420,6 @@ class GDABackend : public Backend {
*/
FreeListProxy<HIPAllocator, GDAContext *> ctx_free_list{};
/**
* @brief Holds maximum number of contexts used in library
*/
size_t maximum_num_contexts_{32};
/**
* @brief The bitmask representing the availability of teams in the pool
*/
+2 -2
Просмотреть файл
@@ -691,7 +691,7 @@ namespace rocshmem
}
int GetClosestNicToGpu(int gpuIndex, char** dev_name)
int GetClosestNicToGpu(int gpuIndex, const char** dev_name)
{
static bool isInitialized = false;
static std::vector<int> closestNicId;
@@ -771,7 +771,7 @@ namespace rocshmem
DPRINTF("GPU Device id: %d closest NIC id : %d name: %s\n", gpuIndex, closestNicId[gpuIndex],
ibvDeviceList[closestNicId[gpuIndex]].name.c_str());
if (dev_name != NULL) {
if (dev_name != nullptr) {
*dev_name = strdup(ibvDeviceList[closestNicId[gpuIndex]].name.c_str());
}
+1 -1
Просмотреть файл
@@ -162,7 +162,7 @@ namespace rocshmem
* @param[out] dev_name Name of of IB Verbs capable NIC index closest to GPU gpuIndex
* @returns index of IB Verbs capable NIC index closest to GPU gpuIndex, or -1 if unable to detect
*/
int GetClosestNicToGpu(int gpuIndex, char **dev_name);
int GetClosestNicToGpu(int gpuIndex, const char** dev_name);
/**
* Returns information about number of available Devices
+8 -17
Просмотреть файл
@@ -25,6 +25,7 @@
#include "host.hpp"
#include "rocshmem/rocshmem_config.h" // NOLINT(build/include_subdir)
#include "envvar.hpp"
#include "host_helpers.hpp"
#include "memory/window_info.hpp"
#include "util.hpp"
@@ -69,7 +70,7 @@ __host__ void HostInterface::release_window_context(WindowInfo* window_info) {
}
int HostInterface::find_avail_pool_entry() {
for (int i{0}; i < max_num_ctxs_; i++) {
for (size_t i = 0; i < envvar::max_num_host_contexts; i++) {
if (host_window_context_pool_[i]->is_avail()) {
return i;
}
@@ -78,7 +79,7 @@ int HostInterface::find_avail_pool_entry() {
}
int HostInterface::find_win_info_in_pool(WindowInfo* window_info) {
for (int i{0}; i < max_num_ctxs_; i++) {
for (size_t i = 0; i < envvar::max_num_host_contexts; i++) {
if (host_window_context_pool_[i]->is_avail()) {
continue;
}
@@ -109,16 +110,11 @@ __host__ HostInterface::HostInterface(HdpPolicy* hdp_policy,
/*
* Allocate and initialize pool of windows for contexts
*/
char* value{nullptr};
if ((value = getenv("ROCSHMEM_MAX_NUM_HOST_CONTEXTS"))) {
max_num_ctxs_ = atoi(value);
}
size_t pool_size = max_num_ctxs_ * sizeof(HostContextWindowInfo*);
size_t pool_size = envvar::max_num_host_contexts * sizeof(HostContextWindowInfo*);
host_window_context_pool_ =
reinterpret_cast<HostContextWindowInfo**>(malloc(pool_size));
for (int ctx_i = 0; ctx_i < max_num_ctxs_; ctx_i++) {
for (size_t ctx_i = 0; ctx_i < envvar::max_num_host_contexts; ctx_i++) {
host_window_context_pool_[ctx_i] =
new HostContextWindowInfo(host_comm_world_, heap);
}
@@ -164,16 +160,11 @@ __host__ HostInterface::HostInterface(HdpPolicy* hdp_policy,
/*
* Allocate and initialize pool of windows for contexts
*/
char* value{nullptr};
if ((value = getenv("ROCSHMEM_MAX_NUM_HOST_CONTEXTS"))) {
max_num_ctxs_ = atoi(value);
}
size_t pool_size = max_num_ctxs_ * sizeof(HostContextWindowInfo*);
size_t pool_size = envvar::max_num_host_contexts * sizeof(HostContextWindowInfo*);
host_window_context_pool_ =
reinterpret_cast<HostContextWindowInfo**>(malloc(pool_size));
for (int ctx_i = 0; ctx_i < max_num_ctxs_; ctx_i++) {
for (size_t ctx_i = 0; ctx_i < envvar::max_num_host_contexts; ctx_i++) {
host_window_context_pool_[ctx_i] =
new HostContextWindowInfo(heap);
}
@@ -194,7 +185,7 @@ __host__ HostInterface::~HostInterface() {
/* Detroy the pool of contexts */
if (host_window_context_pool_ != nullptr) {
for (int ctx_i = 0; ctx_i < max_num_ctxs_; ctx_i++) {
for (size_t ctx_i = 0; ctx_i < envvar::max_num_host_contexts; ctx_i++) {
delete host_window_context_pool_[ctx_i];
}
free(host_window_context_pool_);
-5
Просмотреть файл
@@ -349,11 +349,6 @@ class HostInterface {
MPI_Win hdp_win;
#endif // USE_HDP_FLUSH
/**
* @brief Max number of contexts for the application
*/
int max_num_ctxs_{1};
/**
* @brief Pool of HostContexWindowInfos
*/
+4 -8
Просмотреть файл
@@ -29,8 +29,9 @@
#include <cassert>
#include "backend_ipc.hpp"
#include "mpi_instance.hpp"
#include "envvar.hpp"
#include "ipc_team.hpp"
#include "mpi_instance.hpp"
namespace rocshmem {
@@ -105,11 +106,6 @@ IPCBackend::IPCBackend(TcpBootstrap *bootstrap): Backend(bootstrap) {
}
void IPCBackend::init() {
if (auto maximum_num_contexts_str = getenv("ROCSHMEM_MAX_NUM_CONTEXTS")) {
std::stringstream sstream(maximum_num_contexts_str);
sstream >> maximum_num_contexts_;
}
ROCSHMEM_HOST_CTX_DEFAULT.ctx_opaque = default_host_ctx.get();
setup_team_world();
@@ -144,9 +140,9 @@ IPCBackend::~IPCBackend() {
}
void IPCBackend::setup_ctxs() {
CHECK_HIP(hipMalloc(&ctx_array, sizeof(IPCContext) * maximum_num_contexts_));
CHECK_HIP(hipMalloc(&ctx_array, sizeof(IPCContext) * envvar::max_num_contexts));
// 0th context is default context
for (size_t i = 0; i < maximum_num_contexts_; i++) {
for (size_t i = 0; i < envvar::max_num_contexts; i++) {
new (&ctx_array[i]) IPCContext(this, i + 1);
ctx_free_list.get()->push_back(ctx_array + i);
}
-5
Просмотреть файл
@@ -236,11 +236,6 @@ class IPCBackend : public Backend {
*/
FreeListProxy<HIPAllocator, IPCContext *> ctx_free_list{};
/**
* @brief Holds maximum number of contexts used in library
*/
size_t maximum_num_contexts_{1024};
/**
* @brief The bitmask representing the availability of teams in the pool
*/
+3 -2
Просмотреть файл
@@ -27,6 +27,7 @@
#include "rocshmem/rocshmem_config.h" // NOLINT(build/include_subdir)
#include "backend_bc.hpp"
#include "context_incl.hpp"
#include "envvar.hpp"
#include "util.hpp"
namespace rocshmem {
@@ -107,7 +108,7 @@ __host__ void IpcOnImpl::ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases,
*/
free(vec_ipc_handle);
if (0 == rocshmem_env_.get_disable_ipc()) {
if (!envvar::ro::disable_ipc) {
int thread_comm_rank {-1};
CHECK_HIP(hipMalloc(reinterpret_cast<void**>(&pes_with_ipc_avail), shm_size * sizeof(int)));
@@ -186,7 +187,7 @@ __host__ void IpcOnImpl::ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases,
*/
free(vec_ipc_handle);
if (0 == rocshmem_env_.get_disable_ipc()) {
if (!envvar::ro::disable_ipc) {
int thread_comm_rank {-1};
CHECK_HIP(hipMalloc(reinterpret_cast<void**>(&pes_with_ipc_avail), shm_size * sizeof(int)));
+1 -9
Просмотреть файл
@@ -28,15 +28,7 @@
namespace rocshmem {
SingleHeap::SingleHeap() {
if (auto heap_size_cstr = getenv("ROCSHMEM_HEAP_SIZE")) {
std::stringstream sstream(heap_size_cstr);
size_t heap_size;
sstream >> heap_size;
heap_mem_ = HEAP_T{heap_size};
strat_ = STRAT_T{&heap_mem_};
}
}
SingleHeap::SingleHeap() { }
void SingleHeap::malloc(void** ptr, size_t size) {
strat_.alloc(reinterpret_cast<char**>(ptr), size);
+2 -1
Просмотреть файл
@@ -25,6 +25,7 @@
#ifndef LIBRARY_SRC_MEMORY_SINGLE_HEAP_HPP_
#define LIBRARY_SRC_MEMORY_SINGLE_HEAP_HPP_
#include "envvar.hpp"
#include "heap_memory.hpp"
#include "heap_type.hpp"
#if defined USE_ALLOC_DLMALLOC
@@ -163,7 +164,7 @@ class SingleHeap {
/**
* @brief Heap memory object
*/
HEAP_T heap_mem_{};
HEAP_T heap_mem_{envvar::heap_size};
/**
* @brief Allocation strategy object
+17 -25
Просмотреть файл
@@ -38,6 +38,7 @@
#include "atomic_return.hpp"
#include "backend_type.hpp"
#include "context_incl.hpp"
#include "envvar.hpp"
#include "mpi_transport.hpp"
#include "ro_net_team.hpp"
#include "util.hpp"
@@ -50,13 +51,9 @@ ROBackend::ROBackend(MPI_Comm comm)
: Backend(comm) {
type = BackendType::RO_BACKEND;
if (auto maximum_num_contexts_str = getenv("ROCSHMEM_MAX_NUM_CONTEXTS")) {
std::stringstream sstream(maximum_num_contexts_str);
sstream >> maximum_num_contexts_;
}
poll_block_count_ = maximum_num_contexts_;
poll_block_count_ = envvar::max_num_contexts;
profiler_proxy_ = ProfilerProxyT(maximum_num_contexts_);
profiler_proxy_ = ProfilerProxyT(envvar::max_num_contexts);
int device_id;
hipDeviceProp_t device_props;
@@ -70,7 +67,7 @@ ROBackend::ROBackend(MPI_Comm comm)
setup_default_ctx_buffers();
size_t num_buff_elems = maximum_num_contexts_ * max_wg_size_;
size_t num_buff_elems = envvar::max_num_contexts * max_wg_size_;
g_ret_buffer_ = RetBufferProxyT(num_buff_elems);
@@ -78,7 +75,7 @@ ROBackend::ROBackend(MPI_Comm comm)
status_ = StatusProxyT(num_buff_elems);
queue_ = Queue(maximum_num_contexts_, queue_size_);
queue_ = Queue(envvar::max_num_contexts, queue_size_);
transport_ = new MPITransport(backend_comm, &queue_);
num_pes = transport_->getNumPes();
@@ -101,7 +98,7 @@ ROBackend::ROBackend(MPI_Comm comm)
initIPC();
transport_->initTransport(maximum_num_contexts_, &backend_proxy);
transport_->initTransport(envvar::max_num_contexts, &backend_proxy);
host_interface = transport_->host_interface;
@@ -129,7 +126,7 @@ ROBackend::ROBackend(MPI_Comm comm)
block_handle_proxy_ = BlockHandleProxyT(g_ret_buffer_.get(),
atomic_ret_buffer_.get(), &queue_,
max_wg_size_, status_.get(), maximum_num_contexts_);
max_wg_size_, status_.get(), envvar::max_num_contexts);
setup_ctxs();
worker_thread = std::thread(&ROBackend::ro_net_poll, this);
@@ -138,20 +135,15 @@ ROBackend::ROBackend(MPI_Comm comm)
}
void ROBackend::setup_ctxs() {
CHECK_HIP(hipMalloc(&ctx_array, sizeof(ROContext) * maximum_num_contexts_));
for (int i = 0; i < maximum_num_contexts_; i++) {
CHECK_HIP(hipMalloc(&ctx_array, sizeof(ROContext) * envvar::max_num_contexts));
for (size_t i = 0; i < envvar::max_num_contexts; i++) {
new (&ctx_array[i]) ROContext(this, i);
ctx_free_list.get()->push_back(ctx_array + i);
}
}
void ROBackend::setup_default_ctx_buffers() {
if (auto maximum_wf_buffers_str = getenv("ROCSHMEM_MAX_WF_BUFFERS")) {
std::stringstream sstream(maximum_wf_buffers_str);
sstream >> max_wavefront_buffers_;
}
size_t num_buff_elems = max_wavefront_buffers_ * wf_size_;
size_t num_buff_elems = envvar::max_wavefront_buffers * wf_size_;
g_ret_buffer_default_ctx_ = RetBufferProxyT(num_buff_elems);
@@ -159,17 +151,17 @@ void ROBackend::setup_default_ctx_buffers() {
status_default_ctx_ = StatusProxyT(num_buff_elems);
default_ctx_status_.get()->allocate_queue(max_wavefront_buffers_);
default_ctx_g_ret_buffer_.get()->allocate_queue(max_wavefront_buffers_);
default_ctx_atomic_ret_buffer_.get()->allocate_queue(max_wavefront_buffers_);
default_ctx_status_.get()->allocate_queue(envvar::max_wavefront_buffers);
default_ctx_g_ret_buffer_.get()->allocate_queue(envvar::max_wavefront_buffers);
default_ctx_atomic_ret_buffer_.get()->allocate_queue(envvar::max_wavefront_buffers);
char* status = status_default_ctx_.get();
uint64_t* g_ret_buf = g_ret_buffer_default_ctx_.get();
uint64_t* atomic_ret_buf = atomic_ret_buffer_default_ctx_.get();
for (int i{0}; i < max_wavefront_buffers_; i++) {
int offset {i * wf_size_};
for (size_t i = 0; i < envvar::max_wavefront_buffers; i++) {
size_t offset = i * wf_size_;
default_ctx_status_.get()->push(status + offset);
default_ctx_g_ret_buffer_.get()->push(g_ret_buf + offset);
default_ctx_atomic_ret_buffer_.get()->push(atomic_ret_buf + offset);
@@ -233,7 +225,7 @@ void ROBackend::ctx_destroy(Context *ctx) {
void ROBackend::reset_backend_stats() {
auto *bp{backend_proxy.get()};
for (size_t i{0}; i < maximum_num_contexts_; i++) {
for (size_t i = 0; i < envvar::max_num_contexts; i++) {
bp->profiler[i].resetStats();
}
}
@@ -259,7 +251,7 @@ void ROBackend::dump_backend_stats() {
auto *bp{backend_proxy.get()};
for (size_t i{0}; i < maximum_num_contexts_; i++) {
for (size_t i = 0; i < envvar::max_num_contexts; i++) {
// Average latency as perceived from a thread
const ROStats &prof{bp->profiler[i]};
us_wait_slot += prof.getStat(WAITING_ON_SLOT) / gpu_frequency_mhz;
-14
Просмотреть файл
@@ -277,20 +277,6 @@ class ROBackend : public Backend {
*/
AtomicWFQueueProxy<HIPAllocator, uint64_t*> default_ctx_atomic_ret_buffer_{};
/**
* @brief Maximum number of wavefront buffer arrays supported in the default
* context.
*
* This value determines the size of the status flag, rocshmem_g return, and
* rocshmem atomic return buffers.
*/
size_t max_wavefront_buffers_{1024};
/**
* @brief Holds maximum number of contexts used in library
*/
size_t maximum_num_contexts_{1024};
/**
* @brief Holds maximum threads per work-group
*/
+2 -3
Просмотреть файл
@@ -32,6 +32,7 @@
#include "host/host.hpp"
#include "backend_ro.hpp"
#include "envvar.hpp"
#include "ro_net_team.hpp"
#include "util.hpp"
@@ -585,8 +586,6 @@ std::unique_ptr<MPI_Request[]> MPITransport::raw_requests() {
}
void MPITransport::progress() {
static int progress_delay = rocshmem_env_.get_ro_progress_delay();
if (requests.size() == 0) {
const int tag{1000};
int flag{0};
@@ -594,7 +593,7 @@ void MPITransport::progress() {
// Slowing the progress engine down a bit avoid hammering the memory subsystem.
// This leads to significant performance benefits
usleep (progress_delay);
usleep(envvar::ro::progress_delay);
NET_CHECK(mpilib_ftable_.Iprobe(MPI_ANY_SOURCE, tag, ro_net_comm_world, &flag, &status));
} else {
DPRINTF("Testing all outstanding requests (%zu)\n", requests.size());
+6 -18
Просмотреть файл
@@ -23,30 +23,18 @@
*****************************************************************************/
#include "queue.hpp"
#include "envvar.hpp"
#include "mpi_transport.hpp"
namespace rocshmem {
Queue::Queue() {
gpu_queue = true;
char *value{nullptr};
if ((value = getenv("RO_NET_CPU_QUEUE")) != nullptr) {
gpu_queue = false;
}
}
Queue::Queue() { }
Queue::Queue(size_t max_queues, size_t queue_size)
: max_queues_{max_queues},
queue_size_{queue_size},
queue_proxy_{max_queues, queue_size},
queue_desc_proxy_{max_queues} {
gpu_queue = true;
char *value{nullptr};
if ((value = getenv("RO_NET_CPU_QUEUE")) != nullptr) {
gpu_queue = false;
}
}
queue_desc_proxy_{max_queues} { }
uint64_t Queue::get_read_index(uint64_t queue_index) {
return descriptor(queue_index)->read_index % queue_size_;
@@ -70,7 +58,7 @@ bool Queue::process(uint64_t queue_index, MPITransport* transport) {
queue_element* Queue::next_element(uint64_t queue_index) {
queue_element *next_elem{nullptr};
if (gpu_queue) {
if (!envvar::ro::net_cpu_queue) {
hdp_proxy_.get()->hdp_flush();
copy_element_to_cache(queue_index);
next_elem = queue_element_cache_proxy_.get();
@@ -90,13 +78,13 @@ void Queue::copy_element_to_cache(uint64_t queue_index) {
}
void Queue::flush_hdp() {
if (!gpu_queue) {
if (envvar::ro::net_cpu_queue) {
hdp_proxy_.get()->hdp_flush();
}
}
void Queue::sfence_flush_hdp() {
if (!gpu_queue) {
if (envvar::ro::net_cpu_queue) {
asm volatile("sfence" ::: "memory");
hdp_proxy_.get()->hdp_flush();
}
-2
Просмотреть файл
@@ -70,8 +70,6 @@ class Queue {
HdpProxy<HIPHostAllocator> hdp_proxy_{};
bool gpu_queue{false};
size_t max_queues_{};
size_t queue_size_{};
+3 -2
Просмотреть файл
@@ -35,6 +35,7 @@
#include "backend_bc.hpp"
#include "context_incl.hpp"
#include "envvar.hpp"
#if defined(USE_GDA)
#include "gda/backend_gda.hpp"
#include "gda/context_gda_tmpl_host.hpp"
@@ -229,9 +230,9 @@ rocshmem_ctx_t ROCSHMEM_HOST_CTX_DEFAULT;
assert (attr->rank < attr->nranks);
bootstr = new TcpBootstrap(attr->rank, attr->nranks);
bootstr->initialize(attr->uid, rocshmem_env_.get_bootstrap_timeout());
bootstr->initialize(attr->uid, envvar::bootstrap::timeout);
if (rocshmem_env_.get_uniqueid_with_mpi() ) {
if (envvar::uniqueid_with_mpi) {
library_init_subcomm(bootstr, attr->nranks, attr->rank);
} else {
library_init (bootstr);
+1 -6
Просмотреть файл
@@ -29,12 +29,7 @@
namespace rocshmem {
TeamTracker::TeamTracker() {
char* value{nullptr};
if ((value = getenv("ROCSHMEM_MAX_NUM_TEAMS"))) {
max_num_teams_ = atoi(value);
}
}
TeamTracker::TeamTracker() { }
void TeamTracker::track(rocshmem_team_t team) {
if (team == ROCSHMEM_TEAM_INVALID) {
+3 -2
Просмотреть файл
@@ -33,6 +33,7 @@
#include <vector>
#include "rocshmem/rocshmem.hpp"
#include "envvar.hpp"
namespace rocshmem {
@@ -93,7 +94,7 @@ class TeamTracker {
*
* @return number of teams supported by tracker
*/
__host__ __device__ int get_max_num_teams() { return max_num_teams_; }
__host__ __device__ int get_max_num_teams() { return max_num_teams_; } // TODO fix narrowing
/**
* @brief Get team world pointer
@@ -124,7 +125,7 @@ class TeamTracker {
* pre-allocate resources (e.g. LDS, working arrays, etc.)
* for teams.
*/
int max_num_teams_{40};
size_t max_num_teams_{envvar::max_num_teams};
/**
* @brief Pointer to implementation of ROCSHMEM_TEAM_WORLD
-75
Просмотреть файл
@@ -34,8 +34,6 @@ namespace rocshmem {
__constant__ int* print_lock;
rocshmem_env_config rocshmem_env_;
typedef struct device_agent {
hsa_agent_t agent;
hsa_amd_memory_pool_t pool;
@@ -123,77 +121,4 @@ void rocm_memory_lock_to_fine_grain(void* ptr, size_t size, void** gpu_ptr,
}
}
rocshmem_env_config::rocshmem_env_config() {
char* env_value = NULL;
env_value = getenv("ROCSHMEM_DISABLE_IPC");
if (NULL != env_value) {
disable_ipc = atoi(env_value);
}
// For backward compatibility, synonymous with ROCSHMEM_DISABLE_IPC
env_value = getenv("ROCSHMEM_RO_DISABLE_IPC");
if (NULL != env_value) {
disable_ipc = atoi(env_value);
}
env_value = getenv("ROCSHMEM_RO_PROGRESS_DELAY");
if (nullptr != env_value) {
ro_progress_delay = atoi(env_value);
}
env_value = getenv("ROCSHMEM_UNIQUEID_WITH_MPI");
if (nullptr != env_value) {
uniqueid_with_mpi = atoi(env_value);
}
env_value = getenv("ROCSHMEM_BOOTSTRAP_TIMEOUT");
if (nullptr != env_value) {
bootstrap_timeout = atoi(env_value);
}
env_value = getenv("ROCSHMEM_BOOTSTRAP_HOSTID");
if (nullptr != env_value) {
bootstrap_hostid = std::string(env_value);
}
env_value = getenv("ROCSHMEM_BOOTSTRAP_SOCKET_FAMILY");
if (nullptr != env_value) {
bootstrap_socket_family = std::string(env_value);
}
env_value = getenv("ROCSHMEM_BOOTSTRAP_SOCKET_IFNAME");
if (nullptr != env_value) {
bootstrap_socket_ifname = std::string(env_value);
}
}
int rocshmem_env_config::get_disable_ipc() {
return disable_ipc;
}
int rocshmem_env_config::get_ro_progress_delay() {
return ro_progress_delay;
}
int rocshmem_env_config::get_uniqueid_with_mpi() {
return uniqueid_with_mpi;
}
int rocshmem_env_config::get_bootstrap_timeout() {
return bootstrap_timeout;
}
std::string rocshmem_env_config::get_bootstrap_hostid() {
return bootstrap_hostid;
}
std::string rocshmem_env_config::get_bootstrap_socket_family() {
return bootstrap_socket_family;
}
std::string rocshmem_env_config::get_bootstrap_socket_ifname() {
return bootstrap_socket_ifname;
}
} // namespace rocshmem
-24
Просмотреть файл
@@ -393,30 +393,6 @@ int rocm_init();
void rocm_memory_lock_to_fine_grain(void* ptr, size_t size, void** gpu_ptr, int gpu_id);
class rocshmem_env_config {
public:
rocshmem_env_config();
int get_disable_ipc();
int get_ro_progress_delay();
int get_uniqueid_with_mpi();
int get_bootstrap_timeout();
std::string get_bootstrap_hostid();
std::string get_bootstrap_socket_family();
std::string get_bootstrap_socket_ifname();
private:
int disable_ipc = 0;
int ro_progress_delay = 3;
int bootstrap_timeout = 5;
int uniqueid_with_mpi = 0;
std::string bootstrap_hostid;
std::string bootstrap_socket_family;
std::string bootstrap_socket_ifname;
};
extern rocshmem_env_config rocshmem_env_;
} // namespace rocshmem
#endif // LIBRARY_SRC_UTIL_HPP_
+1
Просмотреть файл
@@ -63,6 +63,7 @@ target_sources(
free_list_gtest.cpp
wavefront_size_gtest.cpp
atomic_wf_queue_gtest.cpp
envvar_gtest.cpp
)
if (USE_IPC)
+283
Просмотреть файл
@@ -0,0 +1,283 @@
/******************************************************************************
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*****************************************************************************/
#include <limits>
#include "envvar_gtest.hpp"
using namespace rocshmem;
using VarTypes = ::testing::Types<bool,
uint32_t,
size_t,
int64_t,
std::string,
envvar::types::socket_family,
envvar::types::debug_level>;
TYPED_TEST_SUITE(EnvVarUnsetTestFixture, VarTypes);
TYPED_TEST_SUITE(EnvVarSetTestFixture, VarTypes);
TYPED_TEST(EnvVarUnsetTestFixture, name) {
EXPECT_EQ(this->var_.get_name(), this->var_full_name_);
}
TYPED_TEST(EnvVarUnsetTestFixture, doc) {
EXPECT_EQ(this->var_.get_doc(), this->var_doc_);
}
TYPED_TEST(EnvVarUnsetTestFixture, is_default) {
EXPECT_TRUE(this->var_.is_default());
EXPECT_EQ(this->var_.get_value(), this->var_.get_default());
}
TYPED_TEST(EnvVarSetTestFixture, is_default) {
EXPECT_FALSE(this->var_.is_default());
EXPECT_EQ(this->var_.get_value(), this->var_.get_default());
}
TEST_F(EnvVarTestFixture, string_custom_default) {
const std::string default_value_{"This is the default value."};
const envvar::var<std::string> var_{this->var_name_, this->var_doc_, default_value_};
EXPECT_TRUE(var_.is_default());
EXPECT_EQ(var_.get_default(), default_value_);
}
TEST_F(EnvVarTestFixture, parse_integer) {
this->setenv("1073741824");
const envvar::var<int64_t> var_{this->var_name_, this->var_doc_};
EXPECT_FALSE(var_.is_default());
EXPECT_EQ(var_.get_default(), 0);
EXPECT_EQ(var_.get_value(), 1L << 30);
}
TEST_F(EnvVarTestFixture, parse_integer_notaninteger) {
this->setenv("Ceci n'est pas un entier.");
const envvar::var<int64_t> var_{this->var_name_, this->var_doc_, 1L << 30};
EXPECT_TRUE(var_.is_default());
EXPECT_EQ(var_.get_value(), 1L << 30);
}
TEST_F(EnvVarTestFixture, parse_integer_large) {
this->setenv("9223372036854775807");
const envvar::var<int64_t> var_{this->var_name_, this->var_doc_};
EXPECT_FALSE(var_.is_default());
EXPECT_EQ(var_.get_default(), 0);
EXPECT_EQ(var_.get_value(), std::numeric_limits<int64_t>::max());
}
TEST_F(EnvVarTestFixture, parse_integer_too_large) {
this->setenv("9223372036854775808");
const envvar::var<int64_t> var_{this->var_name_, this->var_doc_};
EXPECT_TRUE(var_.is_default());
EXPECT_EQ(var_.get_default(), 0);
EXPECT_EQ(var_.get_value(), var_.get_default());
}
TEST_F(EnvVarTestFixture, parse_integer_negative) {
this->setenv("-1073741824");
const envvar::var<int64_t> var_{this->var_name_, this->var_doc_};
EXPECT_FALSE(var_.is_default());
EXPECT_EQ(var_.get_default(), 0);
EXPECT_EQ(var_.get_value(), -1L << 30);
}
TEST_F(EnvVarTestFixture, parse_integer_negative_large) {
this->setenv("-9223372036854775808");
const envvar::var<int64_t> var_{this->var_name_, this->var_doc_};
EXPECT_FALSE(var_.is_default());
EXPECT_EQ(var_.get_default(), 0);
EXPECT_EQ(var_.get_value(), std::numeric_limits<int64_t>::min());
}
TEST_F(EnvVarTestFixture, parse_integer_negative_too_large) {
this->setenv("-9223372036854775809");
const envvar::var<int64_t> var_{this->var_name_, this->var_doc_};
EXPECT_TRUE(var_.is_default());
EXPECT_EQ(var_.get_default(), 0);
EXPECT_EQ(var_.get_value(), var_.get_default());
}
TEST_F(EnvVarTestFixture, parse_integer_hex) {
this->setenv("0x40000000");
const envvar::var<int64_t> var_{this->var_name_, this->var_doc_};
EXPECT_FALSE(var_.is_default());
EXPECT_EQ(var_.get_default(), 0);
EXPECT_EQ(var_.get_value(), 1L << 30);
}
TEST_F(EnvVarTestFixture, parse_integer_hex_only) {
this->setenv("0x40000000");
const envvar::var<int64_t> var_{this->var_name_, this->var_doc_,
envvar::parser::parse_hex<int64_t>{}};
EXPECT_FALSE(var_.is_default());
EXPECT_EQ(var_.get_default(), 0);
EXPECT_EQ(var_.get_value(), 1L << 30);
}
// parse_hex<> interprets input digits as hexadecimal, even without 0x prefix
TEST_F(EnvVarTestFixture, parse_integer_hex_only_noprefix) {
this->setenv("40000000");
const envvar::var<int64_t> var_{this->var_name_, this->var_doc_,
envvar::parser::parse_hex<int64_t>{}};
EXPECT_FALSE(var_.is_default());
EXPECT_EQ(var_.get_default(), 0);
EXPECT_EQ(var_.get_value(), 1L << 30);
}
TEST_F(EnvVarTestFixture, parse_unsigned_integer) {
this->setenv("1073741824");
const envvar::var<uint32_t> var_{this->var_name_, this->var_doc_};
EXPECT_FALSE(var_.is_default());
EXPECT_EQ(var_.get_default(), 0);
EXPECT_EQ(var_.get_value(), 1L << 30);
}
TEST_F(EnvVarTestFixture, parse_unsigned_integer_negative) {
this->setenv("-1");
const envvar::var<uint32_t> var_{this->var_name_, this->var_doc_};
EXPECT_TRUE(var_.is_default());
EXPECT_EQ(var_.get_default(), 0);
EXPECT_EQ(var_.get_value(), var_.get_default());
}
TEST_F(EnvVarTestFixture, parse_unsigned_large) {
this->setenv("4294967296");
const envvar::var<uint32_t> var_{this->var_name_, this->var_doc_};
EXPECT_TRUE(var_.is_default());
EXPECT_EQ(var_.get_default(), 0);
EXPECT_EQ(var_.get_value(), var_.get_default());
}
TEST_F(EnvVarTestFixture, parse_bool_zero) {
this->setenv("0");
const envvar::var<bool> var_{this->var_name_, this->var_doc_};
EXPECT_FALSE(var_.is_default());
EXPECT_EQ(var_.get_default(), false);
EXPECT_EQ(var_.get_value(), false);
}
TEST_F(EnvVarTestFixture, parse_bool_one) {
this->setenv("1");
const envvar::var<bool> var_{this->var_name_, this->var_doc_};
EXPECT_FALSE(var_.is_default());
EXPECT_EQ(var_.get_default(), false);
EXPECT_EQ(var_.get_value(), true);
}
TEST_F(EnvVarTestFixture, parse_bool_negative) {
this->setenv("-1");
const envvar::var<bool> var_{this->var_name_, this->var_doc_};
EXPECT_TRUE(var_.is_default());
EXPECT_EQ(var_.get_default(), false);
EXPECT_EQ(var_.get_value(), false);
}
TEST_F(EnvVarTestFixture, parse_bool_two) {
this->setenv("2");
const envvar::var<bool> var_{this->var_name_, this->var_doc_};
EXPECT_TRUE(var_.is_default());
EXPECT_EQ(var_.get_default(), false);
EXPECT_EQ(var_.get_value(), false);
}
TEST_F(EnvVarTestFixture, parse_bool_false) {
this->setenv("false");
const envvar::var<bool> var_{this->var_name_, this->var_doc_};
EXPECT_FALSE(var_.is_default());
EXPECT_EQ(var_.get_default(), false);
EXPECT_EQ(var_.get_value(), false);
}
TEST_F(EnvVarTestFixture, parse_bool_true) {
this->setenv("true");
const envvar::var<bool> var_{this->var_name_, this->var_doc_};
EXPECT_FALSE(var_.is_default());
EXPECT_EQ(var_.get_default(), false);
EXPECT_EQ(var_.get_value(), true);
}
TEST_F(EnvVarTestFixture, parse_bool_other) {
this->setenv("other");
const envvar::var<bool> var_{this->var_name_, this->var_doc_};
EXPECT_TRUE(var_.is_default());
EXPECT_EQ(var_.get_default(), false);
EXPECT_EQ(var_.get_value(), false);
}
TEST_F(EnvVarTestFixture, parse_socket_family_unspec) {
this->setenv("UNSPEC");
const envvar::var<envvar::types::socket_family> var_{this->var_name_, this->var_doc_};
EXPECT_FALSE(var_.is_default());
EXPECT_EQ(var_.get_default(), envvar::types::socket_family::UNSPEC);
EXPECT_EQ(var_.get_value(), envvar::types::socket_family::UNSPEC);
}
TEST_F(EnvVarTestFixture, parse_socket_family_af_unspec) {
this->setenv("AF_UNSPEC");
const envvar::var<envvar::types::socket_family> var_{this->var_name_, this->var_doc_};
EXPECT_FALSE(var_.is_default());
EXPECT_EQ(var_.get_default(), envvar::types::socket_family::UNSPEC);
EXPECT_EQ(var_.get_value(), envvar::types::socket_family::UNSPEC);
}
TEST_F(EnvVarTestFixture, parse_socket_family_inet) {
this->setenv("INET");
const envvar::var<envvar::types::socket_family> var_{this->var_name_, this->var_doc_};
EXPECT_FALSE(var_.is_default());
EXPECT_EQ(var_.get_default(), envvar::types::socket_family::UNSPEC);
EXPECT_EQ(var_.get_value(), envvar::types::socket_family::INET);
}
TEST_F(EnvVarTestFixture, parse_socket_family_af_inet) {
this->setenv("AF_INET");
const envvar::var<envvar::types::socket_family> var_{this->var_name_, this->var_doc_};
EXPECT_FALSE(var_.is_default());
EXPECT_EQ(var_.get_default(), envvar::types::socket_family::UNSPEC);
EXPECT_EQ(var_.get_value(), envvar::types::socket_family::INET);
}
TEST_F(EnvVarTestFixture, parse_socket_family_inet6) {
this->setenv("INET6");
const envvar::var<envvar::types::socket_family> var_{this->var_name_, this->var_doc_};
EXPECT_FALSE(var_.is_default());
EXPECT_EQ(var_.get_default(), envvar::types::socket_family::UNSPEC);
EXPECT_EQ(var_.get_value(), envvar::types::socket_family::INET6);
}
TEST_F(EnvVarTestFixture, parse_socket_family_af_inet6) {
this->setenv("AF_INET6");
const envvar::var<envvar::types::socket_family> var_{this->var_name_, this->var_doc_};
EXPECT_FALSE(var_.is_default());
EXPECT_EQ(var_.get_default(), envvar::types::socket_family::UNSPEC);
EXPECT_EQ(var_.get_value(), envvar::types::socket_family::INET6);
}
TEST_F(EnvVarTestFixture, parse_socket_family_nonesense) {
this->setenv("'Twas brillig, and the slithy toves, did gyre and gimble in the wabe.");
const envvar::var<envvar::types::socket_family> var_{this->var_name_, this->var_doc_};
EXPECT_TRUE(var_.is_default());
EXPECT_EQ(var_.get_default(), envvar::types::socket_family::UNSPEC);
EXPECT_EQ(var_.get_value(), var_.get_default());
}
+97
Просмотреть файл
@@ -0,0 +1,97 @@
/******************************************************************************
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*****************************************************************************/
#ifndef ROCSHMEM_ENVVAR_GTEST_HPP
#define ROCSHMEM_ENVVAR_GTEST_HPP
#include <cstdlib>
#include <sstream>
#include <string>
#include "gtest/gtest.h"
#include "../src/envvar.hpp"
namespace rocshmem {
class EnvVarTestFixture : public ::testing::Test
{
public:
static constexpr envvar::category::tag category_ = envvar::category::tag::ROCSHMEM;
static constexpr const char* var_name_ = "GTEST";
static constexpr const char* var_prefix_ = envvar::category::prefix<category_>;
static inline const std::string var_full_name_{std::string(var_prefix_).append("_").append(var_name_)};
static constexpr const char* var_doc_ = "Test envvar documentation: documents the test envvar.";
static int setenv(const char* value) {
return ::setenv(var_full_name_.c_str(), value, true);
}
static int setenv(const std::string& value) {
return setenv(value.c_str());
}
static int unsetenv() {
return ::unsetenv(var_full_name_.c_str());
}
protected:
static void SetUpTestSuite() {
unsetenv();
}
static void TearDownTestSuite() {
unsetenv();
}
void SetUp() override {
unsetenv();
}
void TearDown() override {
unsetenv();
}
};
template <typename T>
class EnvVarUnsetTestFixture : public EnvVarTestFixture
{
protected:
const envvar::var<T> var_{var_name_, var_doc_};
};
template <typename T>
class EnvVarSetTestFixture : public EnvVarTestFixture
{
protected:
const envvar::var<T> var_{var_name_, var_doc_};
static void SetUpTestSuite() {
std::ostringstream oss{};
oss << T{};
setenv(oss.str());
}
};
} // namespace rocshmem
#endif // ROCSHMEM_ENVVAR_GTEST_HPP