diff --git a/CHANGELOG.md b/CHANGELOG.md index 581c035814..9363388c98 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -155,6 +155,8 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec - Added support for rocDecode API Tracing - Added usage documentation for ROCTx - Added usage documentation for MPI applications +- SDK: `rocprofiler_agent_v0_t` support for agent UUIDs +- SDK: `rocprofiler_agent_v0_t` support for agent visibility based on gpu isolation environment variables (`ROCR_VISIBLE_DEVICES`, etc.) ### Changed diff --git a/source/docs/rocprofv3-schema.json b/source/docs/rocprofv3-schema.json index 581e13fdfa..faa9bcc22e 100644 --- a/source/docs/rocprofv3-schema.json +++ b/source/docs/rocprofv3-schema.json @@ -602,26 +602,13 @@ "type": "integer", "description": "ID of the code object." }, - "rocp_agent": { + "agent_id": { "type": "object", - "description": "ROCP agent information.", + "description": "Rocprofiler-SDK agent information.", "properties": { "handle": { "type": "integer", - "description": "Handle of the ROCP agent." - } - }, - "required": [ - "handle" - ] - }, - "hsa_agent": { - "type": "object", - "description": "HSA agent information.", - "properties": { - "handle": { - "type": "integer", - "description": "Handle of the HSA agent." + "description": "Handle of the Rocprofiler-SDK agent." } }, "required": [ diff --git a/source/include/rocprofiler-sdk/agent.h b/source/include/rocprofiler-sdk/agent.h index 50753eab2c..a1ece80627 100644 --- a/source/include/rocprofiler-sdk/agent.h +++ b/source/include/rocprofiler-sdk/agent.h @@ -97,6 +97,26 @@ typedef struct rocprofiler_agent_mem_bank_t uint64_t size_in_bytes; ///< physical memory size of the memory range in bytes } rocprofiler_agent_mem_bank_t; +/** + * @brief Provides an *estimate* about the runtime visibility of an agent based on the environment + * variables (ROCR_VISIBLE_DEVICES, HIP_VISIBLE_DEVICES, GPU_DEVICE_ORDINAL, CUDA_VISIBLE_DEVICES). + * Reference: https://rocm.docs.amd.com/en/latest/conceptual/gpu-isolation.html + */ +typedef struct rocprofiler_agent_runtime_visiblity_t +{ + // Note: HSA == ROCR + uint32_t hsa : 1; ///> if not visible to HSA, agent not visible to anything built on HSA + uint32_t hip : 1; ///> Built on HSA + uint32_t rccl : 1; ///> Built on HIP + uint32_t rocdecode : 1; ///> Built on HIP + uint32_t reserved : 28; +} rocprofiler_agent_runtime_visiblity_t; + +ROCPROFILER_CXX_CODE( + static_assert( + sizeof(rocprofiler_agent_runtime_visiblity_t) == sizeof(uint32_t), + "Increasing the size of the rocprofiler_agent_runtime_visiblity_t is not permitted");) + /** * @brief Stores the properties of an agent (CPU, GPU, etc.) * @@ -180,20 +200,33 @@ typedef struct rocprofiler_agent_v0_t ///< dimension of a work-group. rocprofiler_dim3_t grid_max_dim; ///< GPU only. Maximum number of work-items of each dimension ///< of a grid. - const rocprofiler_agent_mem_bank_t* mem_banks; - const rocprofiler_agent_cache_t* caches; - const rocprofiler_agent_io_link_t* io_links; - const char* name; ///< Name of the agent. Will be identical to product name for CPU - const char* vendor_name; ///< Vendor of agent (will be AMD) - const char* product_name; ///< Marketing name - const char* model_name; ///< GPU only. Will be something like vega20, mi200, etc. - uint32_t node_id; ///< Node sequence number. This will be equivalent to the HSA-runtime - ///< HSA_AMD_AGENT_INFO_DRIVER_NODE_ID property - int32_t logical_node_id; ///< Logical sequence number. This will always be [0..N) where N is - ///< the total number of agents - int32_t logical_node_type_id; - int32_t reserved_padding0; ///< padding logical_node_id to 64 bytes + const rocprofiler_agent_mem_bank_t* mem_banks; + const rocprofiler_agent_cache_t* caches; + const rocprofiler_agent_io_link_t* io_links; + const char* name; + const char* vendor_name; ///< Vendor of agent (will be AMD) + const char* product_name; ///< Marketing name + const char* model_name; + uint32_t node_id; + int32_t logical_node_id; + int32_t logical_node_type_id; + rocprofiler_agent_runtime_visiblity_t runtime_visibility; + rocprofiler_uuid_t uuid; ///< GPU only. Universally unique identifier. + /// @var name + /// @brief Name of the agent. Will be identical to product name for CPU + /// + /// @var model_name + /// @brief GPU only. Will be something like vega20, mi200, etc. + /// + /// @var node_id + /// @brief Node sequence number. This will be equivalent to the HSA-runtime + /// HSA_AMD_AGENT_INFO_DRIVER_NODE_ID property + /// + /// @var logical_node_id + /// @brief Logical sequence number. This will always be [0..N) where N is the total number of + /// agents + /// /// @var logical_node_type_id /// @brief Logical sequence number with respect to other agents of same type. This will always /// be [0..N) where N is the total number of X agents (where X is a ::rocprofiler_agent_type_t @@ -207,6 +240,13 @@ typedef struct rocprofiler_agent_v0_t /// then then CPU node_ids 0 and 2 would have logical_node_type_id values of 0 and 1, /// respectively, and GPU node_ids 1 and 3 would also have logical_node_type_id values of 0 /// and 1. + /// + /// @var runtime_visibility + /// @brief See @rocprofiler_runtime_library_t. This is an estimate about whether this agent will + /// be visible for the runtimes, e.g. if (agent.runtime_visibility & ROCPROFILER_HIP_LIBRARY) != + /// 0 then we believe this agent will be visible to the HIP library. However, this is an + /// estimate and we cannot be certain until the HIP runtime is initialized. This will always be + /// true for CPU agents. } rocprofiler_agent_v0_t; typedef rocprofiler_agent_v0_t rocprofiler_agent_t; diff --git a/source/include/rocprofiler-sdk/callback_tracing.h b/source/include/rocprofiler-sdk/callback_tracing.h index 916386e21f..e4c181460b 100644 --- a/source/include/rocprofiler-sdk/callback_tracing.h +++ b/source/include/rocprofiler-sdk/callback_tracing.h @@ -124,22 +124,36 @@ typedef struct */ typedef struct { - uint64_t size; ///< size of this struct - uint64_t code_object_id; ///< unique code object identifier - rocprofiler_agent_id_t rocp_agent; ///< The agent on which this loaded code object is loaded - hsa_agent_t hsa_agent; ///< The agent on which this loaded code object is loaded - const char* uri; ///< The URI name from which the code object was loaded - uint64_t load_base; ///< The base memory address at which the code object is loaded. This is - ///< the base address of the allocation for the lowest addressed segment of - ///< the code object that is loaded. Note that any non-loaded segments - ///< before the first loaded segment are ignored. - uint64_t load_size; ///< The byte size of the loaded code objects contiguous memory allocation. - int64_t load_delta; ///< The signed byte address difference of the memory address at which the - ///< code object is loaded minus the virtual address specified in the code - ///< object that is loaded. - rocprofiler_code_object_storage_type_t - storage_type; ///< storage type of the code object reader used to load the loaded code - ///< object + uint64_t size; ///< size of this struct + uint64_t code_object_id; ///< unique code object identifier + union + { + rocprofiler_agent_id_t rocp_agent; ///< Deprecated. Renamed to agent_id + rocprofiler_agent_id_t agent_id; ///< The agent on which this loaded code object is loaded + }; + hsa_agent_t hsa_agent; ///< Deprecated. The agent on which this loaded code object is loaded + const char* uri; ///< The URI name from which the code object was loaded + uint64_t load_base; + uint64_t load_size; + int64_t load_delta; + rocprofiler_code_object_storage_type_t storage_type; + + /// @var load_base + /// @brief The base memory address at which the code object is loaded. This is the base address + /// of the allocation for the lowest addressed segment of the code object that is loaded. Note + /// that any non-loaded segments before the first loaded segment are ignored. + /// + /// @var load_size + /// @brief The byte size of the loaded code objects contiguous memory allocation. + /// + /// @var load_delta + /// @brief The signed byte address difference of the memory address at which the code object is + /// loaded minus the virtual address specified in the code object that is loaded. + /// + /// @var storage_type + /// @brief storage type of the code object reader used to load the loaded code object + /// + union { struct @@ -171,26 +185,47 @@ typedef struct * @brief ROCProfiler Code Object Kernel Symbol Tracer Callback Record. * */ -typedef struct +typedef struct rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t { - uint64_t size; ///< size of this struct - uint64_t kernel_id; ///< unique symbol identifier value - uint64_t code_object_id; ///< parent unique code object identifier - const char* kernel_name; ///< name of the kernel - uint64_t kernel_object; ///< kernel object handle, used in the kernel dispatch packet - uint32_t kernarg_segment_size; ///< size of memory (in bytes) allocated for kernel arguments. - ///< Will be multiple of 16 - uint32_t kernarg_segment_alignment; ///< Alignment (in bytes) of the buffer used to pass - ///< arguments to the kernel - uint32_t group_segment_size; ///< Size of static group segment memory required by the kernel - ///< (per work-group), in bytes. AKA: LDS size - uint32_t private_segment_size; ///< Size of static private, spill, and arg segment memory - ///< required by this kernel (per work-item), in bytes. AKA: - ///< scratch size - uint32_t sgpr_count; ///< Scalar general purpose register count - uint32_t arch_vgpr_count; ///< Architecture vector general purpose register count - uint32_t accum_vgpr_count; ///< Accum vector general purpose register count + uint64_t size; ///< size of this struct + uint64_t kernel_id; ///< unique symbol identifier value + uint64_t code_object_id; ///< parent unique code object identifier + const char* kernel_name; ///< name of the kernel + uint64_t kernel_object; + uint32_t kernarg_segment_size; + uint32_t kernarg_segment_alignment; + uint32_t group_segment_size; + uint32_t private_segment_size; + uint32_t sgpr_count; ///< Scalar general purpose register count + uint32_t arch_vgpr_count; ///< Architecture vector general purpose register count + uint32_t accum_vgpr_count; ///< Accum vector general purpose register count + int64_t kernel_code_entry_byte_offset; + rocprofiler_address_t kernel_address; + /// @var kernel_object + /// @brief kernel object handle, used in the kernel dispatch packet + /// + /// @var kernarg_segment_size + /// @brief size of memory (in bytes) allocated for kernel arguments. Will be multiple of 16 + /// + /// @var kernarg_segment_alignment + /// @brief Alignment (in bytes) of the buffer used to pass arguments to the kernel + /// + /// @var group_segment_size + /// @brief Size of static group segment memory required by the kernel (per work-group), in + /// bytes. AKA: LDS size + /// + /// @var private_segment_size + /// @brief Size of static private, spill, and arg segment memory required by this kernel (per + /// work-item), in bytes. AKA: scratch size + /// + /// @var kernel_code_entry_byte_offset + /// @brief Relative offset from kernel_object address to calculate the first address of a + /// kernel. + /// + /// @var kernel_address + /// @brief The first address of a kernel. Useful for PC sampling. + /// } rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t; // rename struct diff --git a/source/include/rocprofiler-sdk/cxx/serialization.hpp b/source/include/rocprofiler-sdk/cxx/serialization.hpp index 09e059fcca..95effccb85 100644 --- a/source/include/rocprofiler-sdk/cxx/serialization.hpp +++ b/source/include/rocprofiler-sdk/cxx/serialization.hpp @@ -151,8 +151,7 @@ save(ArchiveT& ar, rocprofiler_callback_tracing_code_object_load_data_t data) { ROCP_SDK_SAVE_DATA_FIELD(size); ROCP_SDK_SAVE_DATA_FIELD(code_object_id); - ROCP_SDK_SAVE_DATA_FIELD(rocp_agent); - ROCP_SDK_SAVE_DATA_FIELD(hsa_agent); + ROCP_SDK_SAVE_DATA_FIELD(agent_id); ROCP_SDK_SAVE_DATA_CSTR(uri); ROCP_SDK_SAVE_DATA_FIELD(load_base); ROCP_SDK_SAVE_DATA_FIELD(load_size); @@ -185,6 +184,8 @@ save(ArchiveT& ar, rocprofiler_callback_tracing_code_object_kernel_symbol_regist ROCP_SDK_SAVE_DATA_FIELD(sgpr_count); ROCP_SDK_SAVE_DATA_FIELD(arch_vgpr_count); ROCP_SDK_SAVE_DATA_FIELD(accum_vgpr_count); + ROCP_SDK_SAVE_DATA_FIELD(kernel_code_entry_byte_offset); + ROCP_SDK_SAVE_DATA_FIELD(kernel_address); } template diff --git a/source/include/rocprofiler-sdk/cxx/utility.hpp b/source/include/rocprofiler-sdk/cxx/utility.hpp index 38d23f3f50..4bd94c337e 100644 --- a/source/include/rocprofiler-sdk/cxx/utility.hpp +++ b/source/include/rocprofiler-sdk/cxx/utility.hpp @@ -23,6 +23,7 @@ #pragma once +#include #include #include @@ -36,8 +37,14 @@ template auto as_hex(Tp val, size_t width = 0) { + uintptr_t _uintp_val = 0; + if constexpr(std::is_pointer::value) + _uintp_val = reinterpret_cast(val); + else + _uintp_val = val; + auto ss = std::stringstream{}; - ss << "0x" << std::hex << std::setfill('0') << std::setw(width) << val; + ss << "0x" << std::hex << std::setfill('0') << std::setw(width) << _uintp_val; return ss.str(); } } // namespace utility diff --git a/source/include/rocprofiler-sdk/fwd.h b/source/include/rocprofiler-sdk/fwd.h index a8f15fe0b8..fe3a23968b 100644 --- a/source/include/rocprofiler-sdk/fwd.h +++ b/source/include/rocprofiler-sdk/fwd.h @@ -532,6 +532,16 @@ typedef union rocprofiler_address_t void* ptr; ///< usage example: generic form of address } rocprofiler_address_t; +/** + * @brief Stores UUID for devices. + * + */ +typedef union rocprofiler_uuid_t +{ + uint64_t value; ///< numerical value + void* bytes; ///< uuid in hexadecimal +} rocprofiler_uuid_t; + //--------------------------------------------------------------------------------------// // // STRUCTS diff --git a/source/lib/common/environment.cpp b/source/lib/common/environment.cpp index 3ed760ff57..ab727b5666 100644 --- a/source/lib/common/environment.cpp +++ b/source/lib/common/environment.cpp @@ -146,6 +146,7 @@ SPECIALIZE_GET_ENV(uint64_t) SPECIALIZE_SET_ENV(const char*) SPECIALIZE_SET_ENV(std::string) +SPECIALIZE_SET_ENV(std::string_view) SPECIALIZE_SET_ENV(float) SPECIALIZE_SET_ENV(double) } // namespace impl diff --git a/source/lib/output/metadata.cpp b/source/lib/output/metadata.cpp index d1ce8581e0..61f1532d0c 100644 --- a/source/lib/output/metadata.cpp +++ b/source/lib/output/metadata.cpp @@ -75,7 +75,7 @@ query_pc_sampling_configuration(const rocprofiler_pc_sampling_configuration_t* c } // namespace kernel_symbol_info::kernel_symbol_info() -: base_type{0, 0, 0, "", 0, 0, 0, 0, 0, 0, 0, 0} +: base_type{0, 0, 0, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, {.value = 0}} {} constexpr auto null_address_v = rocprofiler_address_t{.value = 0}; diff --git a/source/lib/rocprofiler-sdk/agent.cpp b/source/lib/rocprofiler-sdk/agent.cpp index eeadb2c515..a9948bedf8 100644 --- a/source/lib/rocprofiler-sdk/agent.cpp +++ b/source/lib/rocprofiler-sdk/agent.cpp @@ -20,19 +20,21 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -#include -#include -#include - +#include "lib/rocprofiler-sdk/agent.hpp" +#include "lib/common/environment.hpp" #include "lib/common/filesystem.hpp" #include "lib/common/logging.hpp" #include "lib/common/scope_destructor.hpp" #include "lib/common/static_object.hpp" #include "lib/common/string_entry.hpp" #include "lib/common/utility.hpp" -#include "lib/rocprofiler-sdk/agent.hpp" #include "lib/rocprofiler-sdk/hsa/agent_cache.hpp" +#include +#include +#include +#include + #include #include #include @@ -42,6 +44,7 @@ #include #include +#include #include #include #include @@ -326,6 +329,216 @@ read_property(const MapT& data, const std::string& label, Tp& value) } } +void +update_agent_runtime_visibility(rocprofiler_agent_t& agent_info) +{ + // + // https://rocm.docs.amd.com/en/latest/conceptual/gpu-isolation.html + // + // + // ROCR_VISIBLE_DEVICES + // + // A list of device indices or UUIDs that will be exposed to applications. + // + // Runtime : ROCm Software Runtime. Applies to all applications using the user mode + // ROCm software stack. + // + // Example to expose the 1. device and a device based on UUID. + // export ROCR_VISIBLE_DEVICES="0,GPU-DEADBEEFDEADBEEF" + // + // GPU_DEVICE_ORDINAL + // Devices indices exposed to OpenCL and HIP applications. + // + // Runtime : ROCm Compute Language Runtime (ROCclr). Applies to applications and + // runtimes using the ROCclr abstraction layer including HIP and OpenCL applications. + // + // Example to expose the 1. and 3. device in the system. + // export GPU_DEVICE_ORDINAL="0,2" + // + // HIP_VISIBLE_DEVICES + // Device indices exposed to HIP applications. + // + // Runtime: HIP runtime. Applies only to applications using HIP on the AMD platform. + // + // Example to expose the 1. and 3. devices in the system. + // export HIP_VISIBLE_DEVICES="0,2" + // + // CUDA_VISIBLE_DEVICES + // Provided for CUDA compatibility, has the same effect as HIP_VISIBLE_DEVICES on the + // AMD platform. + // + // Runtime : HIP or CUDA Runtime. Applies to HIP applications on the AMD or NVIDIA + // platform and CUDA applications. + // + // OMP_DEFAULT_DEVICE + // Default device used for OpenMP target offloading. + // + // Runtime : OpenMP Runtime. Applies only to applications using OpenMP offloading. + // + // Example on setting the default device to the third device. + // export OMP_DEFAULT_DEVICE="2" + // + + struct parse_result + { + bool value = false; + int32_t index = -1; + + operator bool() const { return (value && index >= 0); } + }; + + constexpr auto zero_visibility = rocprofiler_agent_runtime_visiblity_t{ + .hsa = 0, .hip = 0, .rccl = 0, .rocdecode = 0, .reserved = 0}; + constexpr auto full_visibility = rocprofiler_agent_runtime_visiblity_t{ + .hsa = 1, .hip = 1, .rccl = 1, .rocdecode = 1, .reserved = 0}; + + agent_info.runtime_visibility = zero_visibility; + + if(agent_info.type == ROCPROFILER_AGENT_TYPE_CPU) + { + agent_info.runtime_visibility = full_visibility; + } + else if(agent_info.type == ROCPROFILER_AGENT_TYPE_GPU) + { + auto set_hip_visibility = [&agent_info](bool is_hip_visible) { + if(is_hip_visible && agent_info.runtime_visibility.hsa == 0) + { + ROCP_WARNING << fmt::format("Attempt to enable hip visiblity for agent-{} which is " + "not visible to HSA (ROCR)", + agent_info.node_id); + return; + } + + ROCP_INFO << "agent-" << agent_info.node_id + << " :: HIP_VISIBLE_DEVICE = " << std::boolalpha << is_hip_visible; + agent_info.runtime_visibility.hip = is_hip_visible; + agent_info.runtime_visibility.rccl = is_hip_visible; + agent_info.runtime_visibility.rocdecode = is_hip_visible; + }; + + auto set_hsa_visibility = [&agent_info, &set_hip_visibility](bool is_hsa_visible) { + ROCP_INFO << "agent-" << agent_info.node_id + << " :: ROCR_VISIBLE_DEVICE = " << std::boolalpha << is_hsa_visible; + agent_info.runtime_visibility.hsa = is_hsa_visible; + if(!is_hsa_visible) set_hip_visibility(false); + }; + + auto parse_env_visible = [&agent_info](std::string_view env_varname, + int32_t env_node_id) -> std::optional { + constexpr auto uuid_prefix = std::string_view{"GPU-"}; + auto env_value = common::get_env(env_varname, ""); + if(env_value.empty()) return std::nullopt; + + ROCP_INFO << "Found visibility environment variable :: " << env_varname << " = " + << env_value; + int32_t idx = 0; + for(const auto& itr : rocprofiler::sdk::parse::tokenize(env_value, ", ")) + { + if(itr.empty()) continue; + + ROCP_TRACE << "Processing " << env_varname << " token: " << itr; + + auto _idx_v = idx++; + if(itr.find_first_not_of("0123456789") == std::string::npos) + { + auto _ordinal = std::stoll(itr); + if(_ordinal == env_node_id) return parse_result{true, _idx_v}; + } + else if(itr.find(uuid_prefix) == 0 && itr.length() > uuid_prefix.length()) + { + auto _uuid = + std::strtoull(itr.substr(uuid_prefix.length()).c_str(), nullptr, 16); + if(_uuid == agent_info.uuid.value) return parse_result{true, _idx_v}; + } + else + { + ROCP_CI_LOG(WARNING) + << fmt::format("Sequence '{}' in {}={} not recognized. Expected device " + "ordinal or GPU-XXX where XXX is the hexadecimal UUID", + itr, + env_varname, + env_value); + } + } + return parse_result{false, agent_info.logical_node_type_id}; + }; + + static_assert( + ROCPROFILER_LIBRARY_LAST == ROCPROFILER_ROCDECODE_LIBRARY, + "Since a new library was added to rocprofiler_runtime_library_t, please make sure " + "rocprofiler_agent_runtime_visiblity_t has an entry for this library (if " + "necessary) and make the necessary updates to the logic below has been updated"); + + std::string_view hip_visible_envvar = "HIP_VISIBLE_DEVICES"; + + auto rocr_visible = + parse_env_visible("ROCR_VISIBLE_DEVICES", agent_info.logical_node_type_id); + + auto rocr_index = + (rocr_visible && *rocr_visible) ? rocr_visible->index : agent_info.logical_node_type_id; + + ROCP_INFO << fmt::format("agent-{} (GPU {}) has a rocr index = {}", + agent_info.node_id, + agent_info.logical_node_type_id, + rocr_index); + + auto hip_visible = parse_env_visible(hip_visible_envvar, rocr_index); + + auto parse_hip_visible_alt = [&hip_visible, &agent_info, &rocr_index, &parse_env_visible]( + std::string_view env_primary, + std::string_view env_secondary) { + auto secondary_visible = parse_env_visible(env_secondary, rocr_index); + if(secondary_visible && !hip_visible) + { + hip_visible = secondary_visible; + return env_secondary; + } + else if(secondary_visible && hip_visible && *secondary_visible != *hip_visible) + { + ROCP_CI_LOG(WARNING) << fmt::format("Conflicting visibility of agent-{} between " + "{} and {}. Assuming {} supersedes {}", + agent_info.node_id, + env_primary, + env_secondary, + env_primary, + env_secondary); + } + return env_primary; + }; + + // if HIP_VISIBLE_DEVICES is not set, fall back on these + hip_visible_envvar = parse_hip_visible_alt(hip_visible_envvar, "CUDA_VISIBLE_DEVICES"); + hip_visible_envvar = parse_hip_visible_alt(hip_visible_envvar, "GPU_DEVICE_ORDINAL"); + + if(!hip_visible && !rocr_visible) + { + set_hsa_visibility(true); + set_hip_visibility(true); + } + else + { + ROCP_INFO << "agent-" << agent_info.node_id + << " :: logical node type id: " << agent_info.logical_node_type_id; + + if(rocr_visible) + set_hsa_visibility(*rocr_visible); + else + set_hsa_visibility(true); + + if(hip_visible) + set_hip_visibility(*hip_visible); + else + set_hip_visibility((rocr_visible) ? rocr_visible->value : true); + } + } + else + { + ROCP_CI_LOG(WARNING) << "Agent-" << agent_info.node_id + << " has unexpected agent type value " << agent_info.type + << " passed to " << __FUNCTION__; + } +} + using unique_agent_t = std::unique_ptr; auto @@ -436,11 +649,13 @@ read_topology() agent_info.name = ""; agent_info.product_name = ""; agent_info.vendor_name = ""; + agent_info.uuid = {.value = 0}; if(agent_info.type == ROCPROFILER_AGENT_TYPE_GPU) { constexpr auto workgrp_max = 1024; constexpr auto grid_max = std::numeric_limits::max(); + read_property(properties, "unique_id", agent_info.uuid.value); read_property( properties, "max_engine_clk_fcompute", agent_info.max_engine_clk_fcompute); read_property(properties, "local_mem_size", agent_info.local_mem_size); @@ -591,6 +806,8 @@ read_topology() } } + update_agent_runtime_visibility(agent_info); + data.emplace_back(new rocprofiler_agent_t{agent_info}, [](rocprofiler_agent_t* ptr) { if(ptr) { @@ -908,6 +1125,13 @@ get_hsa_agent(const rocprofiler_agent_t* agent) return std::nullopt; } +std::optional +get_hsa_agent(rocprofiler_agent_id_t agent_id) +{ + if(const auto* _agent = get_agent(agent_id); _agent) return get_hsa_agent(_agent); + return std::nullopt; +} + const rocprofiler_agent_t* get_rocprofiler_agent(hsa_agent_t agent) { @@ -947,6 +1171,13 @@ get_agent_available_properties() static std::unordered_set _prop; return _prop; } + +void +internal_refresh_topology() +{ + auto _updated_topology = read_topology(); + std::swap(get_agent_topology(), _updated_topology); +} } // namespace agent } // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk/agent.hpp b/source/lib/rocprofiler-sdk/agent.hpp index c0365c7559..0edf81d62e 100644 --- a/source/lib/rocprofiler-sdk/agent.hpp +++ b/source/lib/rocprofiler-sdk/agent.hpp @@ -49,6 +49,9 @@ construct_agent_cache(::HsaApiTable* table); std::optional get_hsa_agent(const rocprofiler_agent_t* agent); +std::optional +get_hsa_agent(rocprofiler_agent_id_t agent_id); + const rocprofiler_agent_t* get_rocprofiler_agent(hsa_agent_t agent); @@ -72,5 +75,8 @@ get_aql_agent(rocprofiler_agent_id_t id); void construct_agent_cache(::HsaApiTable* table); + +void +internal_refresh_topology(); // only for internal testing } // namespace agent } // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk/code_object/code_object.cpp b/source/lib/rocprofiler-sdk/code_object/code_object.cpp index f9a1023994..9016fc3503 100644 --- a/source/lib/rocprofiler-sdk/code_object/code_object.cpp +++ b/source/lib/rocprofiler-sdk/code_object/code_object.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -170,12 +171,12 @@ using amd_compute_pgm_rsrc_three32_t = uint32_t; struct kernel_descriptor_t { uint8_t reserved0[16]; - int64_t kernel_code_entry_byte_offset; + int64_t kernel_code_entry_byte_offset = 0; uint8_t reserved1[20]; - uint32_t compute_pgm_rsrc3; - uint32_t compute_pgm_rsrc1; - uint32_t compute_pgm_rsrc2; - uint16_t kernel_code_properties; + uint32_t compute_pgm_rsrc3 = 0; + uint32_t compute_pgm_rsrc1 = 0; + uint32_t compute_pgm_rsrc2 = 0; + uint16_t kernel_code_properties = 0; uint8_t reserved2[6]; }; @@ -485,8 +486,11 @@ executable_iterate_agent_symbols_load_callback(hsa_executable_t executabl const auto* kernel_descript = get_kernel_descriptor(data.kernel_object); if(CHECK_NOTNULL(code_obj_v) && CHECK_NOTNULL(kernel_descript)) { - const auto* rocp_agent = agent::get_agent(code_obj_v->rocp_data.rocp_agent); - if(CHECK_NOTNULL(rocp_agent)) + data.kernel_code_entry_byte_offset = kernel_descript->kernel_code_entry_byte_offset; + data.kernel_address.value = data.kernel_object + data.kernel_code_entry_byte_offset; + + if(const auto* rocp_agent = agent::get_agent(code_obj_v->rocp_data.rocp_agent); + CHECK_NOTNULL(rocp_agent)) { data.arch_vgpr_count = arch_vgpr_count(rocp_agent->name, *kernel_descript); data.accum_vgpr_count = accum_vgpr_count(rocp_agent->name, *kernel_descript); @@ -692,15 +696,17 @@ code_object_unload_callback(hsa_executable_t executable, CHECK_NOTNULL(code_obj_arr); - // auto _size = CHECK_NOTNULL(get_code_objects())->rlock([](const auto& data) { return - // data.size(); }); ROCP_INFO << "[inp] executable=" << executable.handle - // << ", code_object=" << loaded_code_object.handle << " vs. " << _size; + ROCP_TRACE << "[inp] executable=" << executable.handle + << ", code_object=" << loaded_code_object.handle << " vs. " + << (CHECK_NOTNULL(get_code_objects())->rlock([](const auto& data) { + return data.size(); + })); CHECK_NOTNULL(get_code_objects())->rlock([&](const code_object_array_t& arr) { for(const auto& itr : arr) { - // ROCP_INFO << "[cmp] executable=" << itr->hsa_executable.handle - // << ", code_object=" << itr->hsa_code_object.handle; + ROCP_TRACE << "[cmp] executable=" << itr->hsa_executable.handle + << ", code_object=" << itr->hsa_code_object.handle; if(itr->hsa_executable.handle == executable.handle && itr->hsa_code_object.handle == loaded_code_object.handle) // if(itr && *itr == code_obj_v) @@ -708,9 +714,12 @@ code_object_unload_callback(hsa_executable_t executable, auto& _last = code_obj_arr->emplace_back(hsa::code_object_unload{.object = itr.get()}); - auto agent = itr->rocp_data.hsa_agent; - ::rocprofiler::hsa::get_core_table()->hsa_executable_iterate_agent_symbols_fn( - executable, agent, executable_iterate_agent_symbols_unload_callback, &_last); + if(auto agent = agent::get_hsa_agent(itr->rocp_data.agent_id); agent) + ::rocprofiler::hsa::get_core_table()->hsa_executable_iterate_agent_symbols_fn( + executable, + *agent, + executable_iterate_agent_symbols_unload_callback, + &_last); } } }); diff --git a/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp b/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp index f357724e6b..c4ad9a2071 100644 --- a/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp +++ b/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp @@ -141,7 +141,8 @@ constexpr rocprofiler_agent_t default_agent = .node_id = 0, .logical_node_id = 0, .logical_node_type_id = 0, - .reserved_padding0 = 0}; + .runtime_visibility = {0, 0, 0, 0, 0}, + .uuid = {.value = 0}}; } // namespace void diff --git a/source/lib/rocprofiler-sdk/tests/agent.cpp b/source/lib/rocprofiler-sdk/tests/agent.cpp index b6e3d6de4f..9723267674 100644 --- a/source/lib/rocprofiler-sdk/tests/agent.cpp +++ b/source/lib/rocprofiler-sdk/tests/agent.cpp @@ -20,13 +20,16 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. +#include "lib/rocprofiler-sdk/agent.hpp" +#include "lib/common/environment.hpp" +#include "lib/rocprofiler-sdk/registration.hpp" +#include "lib/rocprofiler-sdk/tests/details/agent.hpp" + #include #include #include - -#include "lib/rocprofiler-sdk/agent.hpp" -#include "lib/rocprofiler-sdk/registration.hpp" -#include "lib/rocprofiler-sdk/tests/details/agent.hpp" +#include +#include #include #include @@ -104,10 +107,11 @@ TEST(rocprofiler_lib, agent_abi) EXPECT_EQ(offsetof(rocprofiler_agent_t, node_id), 280) << msg; EXPECT_EQ(offsetof(rocprofiler_agent_t, logical_node_id), 284) << msg; EXPECT_EQ(offsetof(rocprofiler_agent_t, logical_node_type_id), 288) << msg; - EXPECT_EQ(offsetof(rocprofiler_agent_t, reserved_padding0), 292) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, runtime_visibility), 292) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, uuid), 296) << msg; // Add test for offset of new field above this. Do NOT change any existing values! - constexpr auto expected_rocp_agent_size = 296; + constexpr auto expected_rocp_agent_size = 304; // If a new field is added, increase this value by the size of the new field(s) EXPECT_EQ(sizeof(rocprofiler_agent_t), expected_rocp_agent_size) << "ABI break. If you added a new field, make sure that this is the only new check that " @@ -286,3 +290,489 @@ TEST(rocprofiler_lib, agent) for(auto& itr : _rocm_info.isas) delete[] itr.name_str; } + +namespace +{ +namespace common = ::rocprofiler::common; + +auto +get_gpu_agents() +{ + namespace agent = ::rocprofiler::agent; + + auto get_env_str = [](std::string_view name) { + return fmt::format("{:>22} = {}", name, common::get_env(name, "")); + }; + + ROCP_WARNING << "get_gpu_agents() :: refreshing internal topology..." + << fmt::format("\n\t{}\n\t{}\n\t{}\n\t{}", + get_env_str("ROCR_VISIBLE_DEVICES"), + get_env_str("HIP_VISIBLE_DEVICES"), + get_env_str("GPU_DEVICE_ORDINAL"), + get_env_str("CUDA_VISIBLE_DEVICES")); + + agent::internal_refresh_topology(); + auto _agents = agent::get_agents(); + auto _gpu_agents = decltype(_agents){}; + auto _cpu_agents = decltype(_agents){}; + for(const auto* itr : _agents) + { + if(itr->type == ROCPROFILER_AGENT_TYPE_CPU) + { + EXPECT_EQ(itr->runtime_visibility.hsa, 1) + << "expect cpu agent-" << itr->node_id << " to be visible"; + EXPECT_EQ(itr->runtime_visibility.hip, 1) + << "expect cpu agent-" << itr->node_id << " to be visible"; + EXPECT_EQ(itr->runtime_visibility.rccl, 1) + << "expect cpu agent-" << itr->node_id << " to be visible"; + EXPECT_EQ(itr->runtime_visibility.rocdecode, 1) + << "expect cpu agent-" << itr->node_id << " to be visible"; + _cpu_agents.emplace_back(itr); + } + else if(itr->type == ROCPROFILER_AGENT_TYPE_GPU) + { + _gpu_agents.emplace_back(itr); + } + } + + EXPECT_EQ(_agents.size(), _cpu_agents.size() + _gpu_agents.size()) + << "cpu: " << _cpu_agents.size() << ", gpu: " << _gpu_agents.size(); + + return _gpu_agents; +} +} // namespace + +TEST(rocprofiler_lib, agent_visibility) +{ + constexpr auto noval = std::string_view{}; + + common::set_env("ROCR_VISIBLE_DEVICES", noval, 1); + common::set_env("HIP_VISIBLE_DEVICES", noval, 1); + common::set_env("GPU_DEVICE_ORDINAL", noval, 1); + common::set_env("CUDA_VISIBLE_DEVICES", noval, 1); + + auto num_gpu_agents = get_gpu_agents().size(); + auto strngpus = std::to_string(num_gpu_agents); + + if(num_gpu_agents < 1) + { + GTEST_SKIP() << "no gpu agents"; + } + + for(const auto* itr : get_gpu_agents()) + { + EXPECT_EQ(itr->runtime_visibility.hsa, 1) << "agent-" << itr->node_id; + EXPECT_EQ(itr->runtime_visibility.hip, 1) << "agent-" << itr->node_id; + EXPECT_EQ(itr->runtime_visibility.rccl, 1) << "agent-" << itr->node_id; + EXPECT_EQ(itr->runtime_visibility.rocdecode, 1) << "agent-" << itr->node_id; + } + + common::set_env("ROCR_VISIBLE_DEVICES", strngpus, 1); + common::set_env("HIP_VISIBLE_DEVICES", noval, 1); + common::set_env("GPU_DEVICE_ORDINAL", noval, 1); + common::set_env("CUDA_VISIBLE_DEVICES", noval, 1); + + for(const auto* itr : get_gpu_agents()) + { + EXPECT_EQ(itr->runtime_visibility.hsa, 0) << "agent-" << itr->node_id; + EXPECT_EQ(itr->runtime_visibility.hip, 0) << "agent-" << itr->node_id; + EXPECT_EQ(itr->runtime_visibility.rccl, 0) << "agent-" << itr->node_id; + EXPECT_EQ(itr->runtime_visibility.rocdecode, 0) << "agent-" << itr->node_id; + } + + common::set_env("ROCR_VISIBLE_DEVICES", noval, 1); + common::set_env("HIP_VISIBLE_DEVICES", strngpus, 1); + common::set_env("GPU_DEVICE_ORDINAL", noval, 1); + common::set_env("CUDA_VISIBLE_DEVICES", noval, 1); + + for(const auto* itr : get_gpu_agents()) + { + EXPECT_EQ(itr->runtime_visibility.hsa, 1) << "agent-" << itr->node_id; + EXPECT_EQ(itr->runtime_visibility.hip, 0) << "agent-" << itr->node_id; + EXPECT_EQ(itr->runtime_visibility.rccl, 0) << "agent-" << itr->node_id; + EXPECT_EQ(itr->runtime_visibility.rocdecode, 0) << "agent-" << itr->node_id; + } + + common::set_env("ROCR_VISIBLE_DEVICES", noval, 1); + common::set_env("HIP_VISIBLE_DEVICES", noval, 1); + common::set_env("GPU_DEVICE_ORDINAL", strngpus, 1); + common::set_env("CUDA_VISIBLE_DEVICES", noval, 1); + + for(const auto* itr : get_gpu_agents()) + { + EXPECT_EQ(itr->runtime_visibility.hsa, 1) << "agent-" << itr->node_id; + EXPECT_EQ(itr->runtime_visibility.hip, 0) << "agent-" << itr->node_id; + EXPECT_EQ(itr->runtime_visibility.rccl, 0) << "agent-" << itr->node_id; + EXPECT_EQ(itr->runtime_visibility.rocdecode, 0) << "agent-" << itr->node_id; + } + + common::set_env("ROCR_VISIBLE_DEVICES", noval, 1); + common::set_env("HIP_VISIBLE_DEVICES", noval, 1); + common::set_env("GPU_DEVICE_ORDINAL", noval, 1); + common::set_env("CUDA_VISIBLE_DEVICES", strngpus, 1); + + for(const auto* itr : get_gpu_agents()) + { + EXPECT_EQ(itr->runtime_visibility.hsa, 1) << "agent-" << itr->node_id; + EXPECT_EQ(itr->runtime_visibility.hip, 0) << "agent-" << itr->node_id; + EXPECT_EQ(itr->runtime_visibility.rccl, 0) << "agent-" << itr->node_id; + EXPECT_EQ(itr->runtime_visibility.rocdecode, 0) << "agent-" << itr->node_id; + } +} + +TEST(rocprofiler_lib, agent_visibility_multigpu) +{ + constexpr auto noval = std::string_view{}; + + common::set_env("ROCR_VISIBLE_DEVICES", noval, 1); + common::set_env("HIP_VISIBLE_DEVICES", noval, 1); + common::set_env("GPU_DEVICE_ORDINAL", noval, 1); + common::set_env("CUDA_VISIBLE_DEVICES", noval, 1); + + auto ordinals = std::map{}; + auto uuids = std::map{}; + auto in_half = std::map{}; + auto num_gpu_agents = size_t{0}; + auto all_ordinals = std::string{}; + auto all_uuids = std::string{}; + auto all_mixed = std::string{}; + auto half_ordinals = std::string{}; + auto half_uuids = std::string{}; + auto half_mixed = std::string{}; + { + auto _agents = get_gpu_agents(); + num_gpu_agents = _agents.size(); + size_t count = 0; + for(const auto* itr : _agents) + { + ordinals.emplace(itr->id, itr->logical_node_type_id); + uuids.emplace(itr->id, fmt::format("GPU-{:X}", itr->uuid.value)); + ROCP_WARNING << ordinals.at(itr->id) << " :: " << uuids.at(itr->id); + + all_ordinals = fmt::format("{},{}", all_ordinals, ordinals.at(itr->id)); + all_uuids = fmt::format("{},{}", all_uuids, uuids.at(itr->id)); + + if((count % 2) == 0) + all_mixed = fmt::format("{},{}", all_mixed, uuids.at(itr->id)); + else + all_mixed = fmt::format("{},{}", all_mixed, ordinals.at(itr->id)); + + if(count < (num_gpu_agents / 2)) + { + half_ordinals = all_ordinals.substr(1); + half_uuids = all_uuids.substr(1); + half_mixed = all_mixed.substr(1); + in_half.emplace(itr->id, 1); + } + else + { + in_half.emplace(itr->id, 0); + } + ++count; + } + } + + ASSERT_EQ(in_half.size(), num_gpu_agents); + + auto strngpus = std::to_string(num_gpu_agents); + all_ordinals = all_ordinals.substr(1); + all_uuids = all_uuids.substr(1); + all_mixed = all_mixed.substr(1); + + if(num_gpu_agents < 2) + { + GTEST_SKIP() << "requires multiple gpu agents"; + } + + common::set_env("ROCR_VISIBLE_DEVICES", all_ordinals, 1); + common::set_env("HIP_VISIBLE_DEVICES", noval, 1); + common::set_env("GPU_DEVICE_ORDINAL", noval, 1); + common::set_env("CUDA_VISIBLE_DEVICES", noval, 1); + + for(const auto* itr : get_gpu_agents()) + { + ASSERT_EQ(itr->runtime_visibility.hsa, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.hip, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rccl, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rocdecode, 1) << "agent-" << itr->node_id; + } + + common::set_env("ROCR_VISIBLE_DEVICES", all_uuids, 1); + common::set_env("HIP_VISIBLE_DEVICES", noval, 1); + common::set_env("GPU_DEVICE_ORDINAL", noval, 1); + common::set_env("CUDA_VISIBLE_DEVICES", noval, 1); + + for(const auto* itr : get_gpu_agents()) + { + ASSERT_EQ(itr->runtime_visibility.hsa, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.hip, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rccl, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rocdecode, 1) << "agent-" << itr->node_id; + } + + common::set_env("ROCR_VISIBLE_DEVICES", noval, 1); + common::set_env("HIP_VISIBLE_DEVICES", half_ordinals, 1); + common::set_env("GPU_DEVICE_ORDINAL", noval, 1); + common::set_env("CUDA_VISIBLE_DEVICES", noval, 1); + + for(const auto* itr : get_gpu_agents()) + { + ASSERT_EQ(itr->runtime_visibility.hsa, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.hip, in_half.at(itr->id)) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rccl, in_half.at(itr->id)) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rocdecode, in_half.at(itr->id)) + << "agent-" << itr->node_id; + } + + common::set_env("ROCR_VISIBLE_DEVICES", noval, 1); + common::set_env("HIP_VISIBLE_DEVICES", noval, 1); + common::set_env("GPU_DEVICE_ORDINAL", half_ordinals, 1); + common::set_env("CUDA_VISIBLE_DEVICES", noval, 1); + + for(const auto* itr : get_gpu_agents()) + { + ASSERT_EQ(itr->runtime_visibility.hsa, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.hip, in_half.at(itr->id)) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rccl, in_half.at(itr->id)) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rocdecode, in_half.at(itr->id)) + << "agent-" << itr->node_id; + } + + common::set_env("ROCR_VISIBLE_DEVICES", noval, 1); + common::set_env("HIP_VISIBLE_DEVICES", noval, 1); + common::set_env("GPU_DEVICE_ORDINAL", noval, 1); + common::set_env("CUDA_VISIBLE_DEVICES", half_ordinals, 1); + + for(const auto* itr : get_gpu_agents()) + { + ASSERT_EQ(itr->runtime_visibility.hsa, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.hip, in_half.at(itr->id)) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rccl, in_half.at(itr->id)) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rocdecode, in_half.at(itr->id)) + << "agent-" << itr->node_id; + } + + common::set_env("ROCR_VISIBLE_DEVICES", all_ordinals, 1); + common::set_env("HIP_VISIBLE_DEVICES", half_uuids, 1); + common::set_env("GPU_DEVICE_ORDINAL", noval, 1); + common::set_env("CUDA_VISIBLE_DEVICES", noval, 1); + + for(const auto* itr : get_gpu_agents()) + { + ASSERT_EQ(itr->runtime_visibility.hsa, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.hip, in_half.at(itr->id)) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rccl, in_half.at(itr->id)) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rocdecode, in_half.at(itr->id)) + << "agent-" << itr->node_id; + } + + common::set_env("ROCR_VISIBLE_DEVICES", half_uuids, 1); + common::set_env("HIP_VISIBLE_DEVICES", all_ordinals, 1); + common::set_env("GPU_DEVICE_ORDINAL", noval, 1); + common::set_env("CUDA_VISIBLE_DEVICES", noval, 1); + + for(const auto* itr : get_gpu_agents()) + { + ASSERT_EQ(itr->runtime_visibility.hsa, in_half.at(itr->id)) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.hip, in_half.at(itr->id)) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rccl, in_half.at(itr->id)) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rocdecode, in_half.at(itr->id)) + << "agent-" << itr->node_id; + } + + common::set_env("ROCR_VISIBLE_DEVICES", half_uuids, 1); + common::set_env("HIP_VISIBLE_DEVICES", noval, 1); + common::set_env("GPU_DEVICE_ORDINAL", all_ordinals, 1); + common::set_env("CUDA_VISIBLE_DEVICES", noval, 1); + + for(const auto* itr : get_gpu_agents()) + { + ASSERT_EQ(itr->runtime_visibility.hsa, in_half.at(itr->id)) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.hip, in_half.at(itr->id)) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rccl, in_half.at(itr->id)) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rocdecode, in_half.at(itr->id)) + << "agent-" << itr->node_id; + } + + common::set_env("ROCR_VISIBLE_DEVICES", half_uuids, 1); + common::set_env("HIP_VISIBLE_DEVICES", noval, 1); + common::set_env("GPU_DEVICE_ORDINAL", noval, 1); + common::set_env("CUDA_VISIBLE_DEVICES", all_ordinals, 1); + + for(const auto* itr : get_gpu_agents()) + { + ASSERT_EQ(itr->runtime_visibility.hsa, in_half.at(itr->id)) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.hip, in_half.at(itr->id)) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rccl, in_half.at(itr->id)) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rocdecode, in_half.at(itr->id)) + << "agent-" << itr->node_id; + } +} + +TEST(rocprofiler_lib, agent_visibility_inverted_multigpu) +{ + constexpr auto noval = std::string_view{}; + + common::set_env("ROCR_VISIBLE_DEVICES", noval, 1); + common::set_env("HIP_VISIBLE_DEVICES", noval, 1); + common::set_env("GPU_DEVICE_ORDINAL", noval, 1); + common::set_env("CUDA_VISIBLE_DEVICES", noval, 1); + + auto ordinals = std::map{}; + auto num_gpu_agents = size_t{0}; + auto reversed_uuid = std::string{}; + { + auto _agents = get_gpu_agents(); + num_gpu_agents = _agents.size(); + + if(num_gpu_agents < 2) + { + GTEST_SKIP() << "requires 2 or more gpu agents"; + } + + for(const auto* itr : _agents) + { + auto _uuid = fmt::format("GPU-{:X}", itr->uuid.value); + if(ordinals.empty()) reversed_uuid = fmt::format("1,{}", _uuid); + + ordinals.emplace(itr->id, itr->logical_node_type_id); + } + + // make sure there are 0 and 1 ordinal entries for later checks + size_t count = 0; + for(const auto* itr : _agents) + { + if(ordinals.at(itr->id) == 0) count += 1; + if(ordinals.at(itr->id) == 1) count += 1; + } + ASSERT_EQ(count, 2) << "Did not have ordinals 0 and 1"; + } + + // flip the first two devices + common::set_env("ROCR_VISIBLE_DEVICES", "1,0", 1); + common::set_env("HIP_VISIBLE_DEVICES", "0", 1); + common::set_env("GPU_DEVICE_ORDINAL", noval, 1); + common::set_env("CUDA_VISIBLE_DEVICES", noval, 1); + + for(const auto* itr : get_gpu_agents()) + { + if(ordinals.at(itr->id) == 0) + { + ASSERT_EQ(itr->runtime_visibility.hsa, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.hip, 0) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rccl, 0) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rocdecode, 0) << "agent-" << itr->node_id; + } + else if(ordinals.at(itr->id) == 1) + { + ASSERT_EQ(itr->runtime_visibility.hsa, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.hip, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rccl, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rocdecode, 1) << "agent-" << itr->node_id; + } + else + { + ASSERT_GT(ordinals.at(itr->id), 1); + ASSERT_EQ(itr->runtime_visibility.hsa, 0) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.hip, 0) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rccl, 0) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rocdecode, 0) << "agent-" << itr->node_id; + } + } + + // flip the first two devices + common::set_env("ROCR_VISIBLE_DEVICES", "1,0", 1); + common::set_env("HIP_VISIBLE_DEVICES", noval, 1); + common::set_env("GPU_DEVICE_ORDINAL", "0", 1); + common::set_env("CUDA_VISIBLE_DEVICES", noval, 1); + + for(const auto* itr : get_gpu_agents()) + { + if(ordinals.at(itr->id) == 0) + { + ASSERT_EQ(itr->runtime_visibility.hsa, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.hip, 0) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rccl, 0) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rocdecode, 0) << "agent-" << itr->node_id; + } + else if(ordinals.at(itr->id) == 1) + { + ASSERT_EQ(itr->runtime_visibility.hsa, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.hip, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rccl, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rocdecode, 1) << "agent-" << itr->node_id; + } + else + { + ASSERT_GT(ordinals.at(itr->id), 1); + ASSERT_EQ(itr->runtime_visibility.hsa, 0) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.hip, 0) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rccl, 0) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rocdecode, 0) << "agent-" << itr->node_id; + } + } + + // flip the first two devices + common::set_env("ROCR_VISIBLE_DEVICES", "1,0", 1); + common::set_env("HIP_VISIBLE_DEVICES", noval, 1); + common::set_env("GPU_DEVICE_ORDINAL", noval, 1); + common::set_env("CUDA_VISIBLE_DEVICES", "0", 1); + + for(const auto* itr : get_gpu_agents()) + { + if(ordinals.at(itr->id) == 0) + { + ASSERT_EQ(itr->runtime_visibility.hsa, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.hip, 0) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rccl, 0) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rocdecode, 0) << "agent-" << itr->node_id; + } + else if(ordinals.at(itr->id) == 1) + { + ASSERT_EQ(itr->runtime_visibility.hsa, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.hip, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rccl, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rocdecode, 1) << "agent-" << itr->node_id; + } + else + { + ASSERT_GT(ordinals.at(itr->id), 1); + ASSERT_EQ(itr->runtime_visibility.hsa, 0) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.hip, 0) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rccl, 0) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rocdecode, 0) << "agent-" << itr->node_id; + } + } + + // flip the first two devices + common::set_env("ROCR_VISIBLE_DEVICES", reversed_uuid, 1); + common::set_env("HIP_VISIBLE_DEVICES", "0", 1); + common::set_env("GPU_DEVICE_ORDINAL", noval, 1); + common::set_env("CUDA_VISIBLE_DEVICES", noval, 1); + + for(const auto* itr : get_gpu_agents()) + { + if(ordinals.at(itr->id) == 0) + { + ASSERT_EQ(itr->runtime_visibility.hsa, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.hip, 0) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rccl, 0) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rocdecode, 0) << "agent-" << itr->node_id; + } + else if(ordinals.at(itr->id) == 1) + { + ASSERT_EQ(itr->runtime_visibility.hsa, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.hip, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rccl, 1) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rocdecode, 1) << "agent-" << itr->node_id; + } + else + { + ASSERT_GT(ordinals.at(itr->id), 1); + ASSERT_EQ(itr->runtime_visibility.hsa, 0) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.hip, 0) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rccl, 0) << "agent-" << itr->node_id; + ASSERT_EQ(itr->runtime_visibility.rocdecode, 0) << "agent-" << itr->node_id; + } + } +}