/****************************************************************************** * Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. *****************************************************************************/ #include "util.hpp" #include #include #include #include "config.h" // NOLINT(build/include_subdir) namespace rocshmem { __constant__ int* print_lock; typedef struct device_agent { hsa_agent_t agent; hsa_amd_memory_pool_t pool; } device_agent_t; std::vector gpu_agents; std::vector cpu_agents; __device__ uint64_t __read_clock() { uint64_t clock{}; asm volatile( "s_memrealtime %0\n" "s_waitcnt lgkmcnt(0)\n" : "=s"(clock)); return clock; } hsa_status_t rocm_hsa_amd_memory_pool_callback( hsa_amd_memory_pool_t memory_pool, void* data) { hsa_amd_memory_pool_global_flag_t pool_flag{}; hsa_status_t status{hsa_amd_memory_pool_get_info( memory_pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &pool_flag)}; if (status != HSA_STATUS_SUCCESS) { printf("Failure to get pool info: 0x%x", status); return status; } if (pool_flag == (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT | HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED)) { *static_cast(data) = memory_pool; } return HSA_STATUS_SUCCESS; } hsa_status_t rocm_hsa_agent_callback(hsa_agent_t agent, [[maybe_unused]] void* data) { hsa_device_type_t device_type{}; hsa_status_t status{ hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type)}; if (status != HSA_STATUS_SUCCESS) { printf("Failure to get device type: 0x%x", status); return status; } if (device_type == HSA_DEVICE_TYPE_GPU) { gpu_agents.emplace_back(); gpu_agents.back().agent = agent; status = hsa_amd_agent_iterate_memory_pools( agent, rocm_hsa_amd_memory_pool_callback, &(gpu_agents.back().pool)); } if (device_type == HSA_DEVICE_TYPE_CPU) { cpu_agents.emplace_back(); cpu_agents.back().agent = agent; status = hsa_amd_agent_iterate_memory_pools( agent, rocm_hsa_amd_memory_pool_callback, &(cpu_agents.back().pool)); } return status; } int rocm_init() { hsa_status_t status{hsa_init()}; if (status != HSA_STATUS_SUCCESS) { printf("Failure to open HSA connection: 0x%x", status); return 1; } status = hsa_iterate_agents(rocm_hsa_agent_callback, nullptr); if (status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) { printf("Failure to iterate HSA agents: 0x%x", status); return 1; } return 0; } void rocm_memory_lock_to_fine_grain(void* ptr, size_t size, void** gpu_ptr, int gpu_id) { hsa_status_t status{ hsa_amd_memory_lock_to_pool(ptr, size, &(gpu_agents[gpu_id].agent), 1, cpu_agents[0].pool, 0, gpu_ptr)}; if (status != HSA_STATUS_SUCCESS) { printf("Failed to lock memory pool (%p): 0x%x\n", ptr, status); exit(-1); } } // TODO(kpunniya): use runtime value instead of hard-coded value uint64_t wallClk_freq_mhz() { hipDeviceProp_t deviceProp{}; CHECK_HIP(hipGetDeviceProperties(&deviceProp, 0)); // In rocm 6.x and gcnArchName is used instead of gcnArch #if HIP_VERSION_MAJOR >= 6 const auto gcnArch = std::atoi(deviceProp.gcnArchName); #else const auto gcnArch = deviceProp.gcnArch; #endif switch (gcnArch) { case 900: // MI25 return 27; case 906: return 25; // MI50,MI60 case 908: return 25; // MI100 case 910: return 25; // MI200 default: assert(false && "clock data unavailable"); return 0; } return 0; } } // namespace rocshmem