From f9d0d820694be2c429ae57b3349dbc76f1da7797 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Fri, 13 Nov 2020 12:31:10 -0600 Subject: [PATCH] adding GPU command functions module Change-Id: Id2c2d82ea6fee42695309ad3bb296effa77a2f33 [ROCm/rocprofiler commit: fb82ddee818b0e93636d3f2a3c42ed4ceee0a785] --- projects/rocprofiler/src/CMakeLists.txt | 1 + projects/rocprofiler/src/core/gpu_command.cpp | 127 ++++++++++++++++++ projects/rocprofiler/src/core/gpu_command.h | 62 +++++++++ projects/rocprofiler/src/core/rocprofiler.cpp | 83 +----------- 4 files changed, 193 insertions(+), 80 deletions(-) create mode 100644 projects/rocprofiler/src/core/gpu_command.cpp create mode 100644 projects/rocprofiler/src/core/gpu_command.h diff --git a/projects/rocprofiler/src/CMakeLists.txt b/projects/rocprofiler/src/CMakeLists.txt index dbe00cd990..59168d8fab 100644 --- a/projects/rocprofiler/src/CMakeLists.txt +++ b/projects/rocprofiler/src/CMakeLists.txt @@ -26,6 +26,7 @@ set ( TARGET_LIB "${TARGET_NAME}" ) set ( LIB_SRC ${LIB_DIR}/core/rocprofiler.cpp + ${LIB_DIR}/core/gpu_command.cpp ${LIB_DIR}/core/proxy_queue.cpp ${LIB_DIR}/core/simple_proxy_queue.cpp ${LIB_DIR}/core/intercept_queue.cpp diff --git a/projects/rocprofiler/src/core/gpu_command.cpp b/projects/rocprofiler/src/core/gpu_command.cpp new file mode 100644 index 0000000000..48e4fba72a --- /dev/null +++ b/projects/rocprofiler/src/core/gpu_command.cpp @@ -0,0 +1,127 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#include "gpu_command.h" + +#include + +#include + +#include "core/profile.h" +#include "util/exception.h" +#include "util/hsa_rsrc_factory.h" + +namespace rocprofiler { +size_t CreateGpuCommand(gpu_cmd_op_t op, + const rocprofiler::util::AgentInfo* agent_info, + packet_t* command, + const size_t& slot_count) { + if (op >= NUMBER_GPU_CMD_OP) EXC_RAISING(HSA_STATUS_ERROR, "bad op value (" << op << ")"); + + const bool is_legacy = (strncmp(agent_info->name, "gfx8", 4) == 0); + const size_t packet_count = (is_legacy) ? Profile::LEGACY_SLOT_SIZE_PKT : 1; + + rocprofiler::util::HsaRsrcFactory* hsa_rsrc = &rocprofiler::util::HsaRsrcFactory::Instance(); + + if (packet_count > slot_count) EXC_RAISING(HSA_STATUS_ERROR, "packet_count > slot_count"); + + // AQLprofile object + hsa_ven_amd_aqlprofile_profile_t profile{}; + profile.agent = agent_info->dev_id; + // Query for cmd buffer size + hsa_ven_amd_aqlprofile_info_type_t info_type = + (hsa_ven_amd_aqlprofile_info_type_t)((int)HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD + (int)op); + hsa_status_t status = hsa_rsrc->AqlProfileApi()->hsa_ven_amd_aqlprofile_get_info(&profile, info_type, NULL); + if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "get_info(ENABLE_CMD ).size exc, op(" << int(op) << ")"); + if (profile.command_buffer.size == 0) EXC_RAISING(status, "get_info(ENABLE_CMD).size == 0"); + // Allocate cmd buffer + const size_t aligment_mask = 0x100 - 1; + profile.command_buffer.ptr = + hsa_rsrc->AllocateSysMemory(agent_info, profile.command_buffer.size); + if ((reinterpret_cast(profile.command_buffer.ptr) & aligment_mask) != 0) { + EXC_RAISING(status, "profile.command_buffer.ptr bad alignment"); + } + + // Generating cmd packet + if (is_legacy) { + packet_t packet{}; + + // Query for cmd buffer data + status = hsa_rsrc->AqlProfileApi()->hsa_ven_amd_aqlprofile_get_info(&profile, info_type, &packet); + if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "get_info(ENABLE_CMD).data exc"); + + // Check for legacy GFXIP + status = hsa_rsrc->AqlProfileApi()->hsa_ven_amd_aqlprofile_legacy_get_pm4(&packet, command); + if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); + } else { + // Query for cmd buffer data + status = hsa_rsrc->AqlProfileApi()->hsa_ven_amd_aqlprofile_get_info(&profile, info_type, command); + if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "get_info(ENABLE_CMD).data exc"); + } + + // Return cmd packet data size + return (packet_count * sizeof(packet_t)); +} + +struct gpu_cmd_entry_t { + packet_t command[Profile::LEGACY_SLOT_SIZE_PKT]; + uint32_t size; +}; +struct gpu_cmd_key_t { + gpu_cmd_op_t op; + uint32_t chip_id; +}; +struct gpu_cmd_fncomp_t { + bool operator() (const gpu_cmd_key_t& a, const gpu_cmd_key_t& b) const { + return (a.op < b.op) || ((a.op == b.op) && (a.chip_id < b.chip_id)); + } +}; +typedef std::map gpu_cmd_map_t; + +typedef std::mutex gpu_cmd_mutex_t; +gpu_cmd_mutex_t gpu_cmd_mutex; + +size_t GetGpuCommand(gpu_cmd_op_t op, + const rocprofiler::util::AgentInfo* agent_info, + packet_t** command_out) { + static gpu_cmd_map_t* map = NULL; + + // Getting chip-id + uint32_t chip_id = 0; + hsa_agent_info_t attribute = static_cast(HSA_AMD_AGENT_INFO_CHIP_ID); + hsa_status_t status = hsa_agent_get_info(agent_info->dev_id, attribute, &chip_id); + if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_agent_get_info failed"); + + // Query/create a command + std::lock_guard lck(gpu_cmd_mutex); + if (map == NULL) map = new gpu_cmd_map_t; + auto ret = map->insert({gpu_cmd_key_t{op, chip_id}, gpu_cmd_entry_t{}}); + gpu_cmd_map_t::iterator it = ret.first; + if (ret.second) { + it->second.size = CreateGpuCommand(op, agent_info, it->second.command, Profile::LEGACY_SLOT_SIZE_PKT); + } + + *command_out = it->second.command; + return it->second.size; +} + +} // namespace rocprofiler diff --git a/projects/rocprofiler/src/core/gpu_command.h b/projects/rocprofiler/src/core/gpu_command.h new file mode 100644 index 0000000000..db2ddb8c19 --- /dev/null +++ b/projects/rocprofiler/src/core/gpu_command.h @@ -0,0 +1,62 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#ifndef SRC_CORE_GPU_COMMAND_H_ +#define SRC_CORE_GPU_COMMAND_H_ + +#include + +#include "core/types.h" +#include "util/hsa_rsrc_factory.h" + +namespace rocprofiler { +enum gpu_cmd_op_t { + PMC_ENABLE_GPU_CMD_OP = 0, + PMC_DISABLE_GPU_CMD_OP = 1, + WAIT_IDLE_GPU_CMD_OP = 2, + NUMBER_GPU_CMD_OP +}; + +size_t GetGpuCommand(gpu_cmd_op_t op, + const rocprofiler::util::AgentInfo* agent_info, + packet_t** command_out); + +static inline size_t IssueGpuCommand(gpu_cmd_op_t op, + const rocprofiler::util::AgentInfo* agent_info, + hsa_queue_t* queue) { + packet_t* command; + const size_t size = GetGpuCommand(op, agent_info, &command); + rocprofiler::util::HsaRsrcFactory::Instance().Submit(queue, command, size); + return HSA_STATUS_SUCCESS; +} + +static inline size_t IssueGpuCommand(gpu_cmd_op_t op, + hsa_agent_t agent, + hsa_queue_t* queue) { + rocprofiler::util::HsaRsrcFactory* hsa_rsrc = &rocprofiler::util::HsaRsrcFactory::Instance(); + const rocprofiler::util::AgentInfo* agent_info = hsa_rsrc->GetAgentInfo(agent); + return IssueGpuCommand(op, agent_info, queue); +} + +} // namespace rocprofiler + +#endif // SRC_CORE_GPU_COMMAND_H_ diff --git a/projects/rocprofiler/src/core/rocprofiler.cpp b/projects/rocprofiler/src/core/rocprofiler.cpp index 6c0b06ff50..e79b93940e 100644 --- a/projects/rocprofiler/src/core/rocprofiler.cpp +++ b/projects/rocprofiler/src/core/rocprofiler.cpp @@ -30,6 +30,7 @@ THE SOFTWARE. #include "core/context.h" #include "core/context_pool.h" +#include "core/gpu_command.h" #include "core/hsa_queue.h" #include "core/hsa_interceptor.h" #include "core/intercept_queue.h" @@ -308,53 +309,6 @@ hsa_status_t GetExcStatus(const std::exception& e) { : HSA_STATUS_ERROR; } -inline size_t CreateEnableCmd(const rocprofiler::util::AgentInfo* agent_info, packet_t* command, const size_t& slot_count) { - const bool is_legacy = (strncmp(agent_info->name, "gfx8", 4) == 0); - const size_t packet_count = (is_legacy) ? Profile::LEGACY_SLOT_SIZE_PKT : 1; - - rocprofiler::util::HsaRsrcFactory* hsa_rsrc = &rocprofiler::util::HsaRsrcFactory::Instance(); - - if (packet_count > slot_count) EXC_RAISING(HSA_STATUS_ERROR, "packet_count > slot_count"); - - // AQLprofile object - hsa_ven_amd_aqlprofile_profile_t profile{}; - profile.agent = agent_info->dev_id; - // Query for cmd buffer size - hsa_status_t status = hsa_rsrc->AqlProfileApi()->hsa_ven_amd_aqlprofile_get_info( - &profile, HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD, NULL); - if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "get_info(ENABLE_CMD).size exc"); - if (profile.command_buffer.size == 0) EXC_RAISING(status, "get_info(ENABLE_CMD).size == 0"); - // Allocate cmd buffer - const size_t aligment_mask = 0x100 - 1; - profile.command_buffer.ptr = - hsa_rsrc->AllocateSysMemory(agent_info, profile.command_buffer.size); - if ((reinterpret_cast(profile.command_buffer.ptr) & aligment_mask) != 0) { - EXC_RAISING(status, "profile.command_buffer.ptr bad alignment"); - } - - // Generating cmd packet - if (is_legacy) { - packet_t packet{}; - - // Query for cmd buffer data - status = hsa_rsrc->AqlProfileApi()->hsa_ven_amd_aqlprofile_get_info( - &profile, HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD, &packet); - if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "get_info(ENABLE_CMD).data exc"); - - // Check for legacy GFXIP - status = hsa_rsrc->AqlProfileApi()->hsa_ven_amd_aqlprofile_legacy_get_pm4(&packet, command); - if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); - } else { - // Query for cmd buffer data - status = hsa_rsrc->AqlProfileApi()->hsa_ven_amd_aqlprofile_get_info( - &profile, HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD, command); - if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "get_info(ENABLE_CMD).data exc"); - } - - // Return cmd packet data size - return (packet_count * sizeof(packet_t)); -} - hsa_status_t CreateQueuePro( hsa_agent_t agent, uint32_t size, @@ -365,14 +319,6 @@ hsa_status_t CreateQueuePro( uint32_t group_segment_size, hsa_queue_t **queue) { - typedef std::pair cmd_entry_t; - typedef std::vector cmd_vec_t; - static cmd_vec_t cmd_vec; - static uint32_t cmd_mask = 0; - static std::mutex cmd_mutex; - - rocprofiler::util::HsaRsrcFactory* hsa_rsrc = &rocprofiler::util::HsaRsrcFactory::Instance(); - // Create HSA queue hsa_status_t status = hsa_queue_create_fn( agent, @@ -385,31 +331,8 @@ hsa_status_t CreateQueuePro( queue); if (status != HSA_STATUS_SUCCESS) return status; - // Create 'Enable' cmd packet - const rocprofiler::util::AgentInfo* agent_info = hsa_rsrc->GetAgentInfo(agent); - const uint32_t dev_index = 1 << agent_info->dev_index; - const uint32_t dev_mask = 1 << dev_index; - if ((cmd_mask & dev_mask) == 0) { - std::lock_guard lck(cmd_mutex); - - if ((cmd_mask & dev_mask) == 0) { - cmd_mask |= dev_mask; - // Allocating cmd vector - uint32_t mask = 1; - while (1) { - const uint32_t max = 1 << cmd_vec.size(); - if (mask >= max) cmd_vec.push_back({}); - if (((mask & dev_mask) != 0) || (mask == 0)) break; - mask <<= 1; - } - if (mask == 0) EXC_RAISING(status, "bad device index (" << dev_index << ")"); - // Creating cmd packets - cmd_vec[dev_index].second = CreateEnableCmd(agent_info, cmd_vec[dev_index].first, Profile::LEGACY_SLOT_SIZE_PKT); - } - } - - // Enable counters for the queue - rocprofiler::util::HsaRsrcFactory::Instance().Submit(*queue, cmd_vec[dev_index].first, cmd_vec[dev_index].second); + // Issue PMC-enable GPU command + IssueGpuCommand(PMC_ENABLE_GPU_CMD_OP, agent, *queue); return HSA_STATUS_SUCCESS; }