From ef47516a88cd2b833b973799c46bb3ff9f2792bb Mon Sep 17 00:00:00 2001 From: Laurent Morichetti Date: Thu, 5 May 2022 08:10:51 -0700 Subject: [PATCH] Remove now unused hsa_rsrc_factory Change-Id: I66175eb9fae2e7e61400af77a0c89be9c39e770e [ROCm/roctracer commit: 4ced94b9a2c3d7fd0379ef7297dddce0b558bfc9] --- projects/roctracer/src/CMakeLists.txt | 1 - projects/roctracer/src/core/roctracer.cpp | 10 + projects/roctracer/src/core/tracker.h | 10 +- .../roctracer/src/util/hsa_rsrc_factory.cpp | 807 ------------------ .../roctracer/src/util/hsa_rsrc_factory.h | 520 ----------- 5 files changed, 11 insertions(+), 1337 deletions(-) delete mode 100644 projects/roctracer/src/util/hsa_rsrc_factory.cpp delete mode 100644 projects/roctracer/src/util/hsa_rsrc_factory.h diff --git a/projects/roctracer/src/CMakeLists.txt b/projects/roctracer/src/CMakeLists.txt index 6f596085f5..7dee1380bf 100644 --- a/projects/roctracer/src/CMakeLists.txt +++ b/projects/roctracer/src/CMakeLists.txt @@ -40,7 +40,6 @@ execute_process ( COMMAND sh -xc "ln -s ${ROOT_DIR}/../rocprofiler/src/core/acti set ( TARGET_LIB ${TARGET_NAME} ) set ( LIB_SRC ${LIB_DIR}/core/roctracer.cpp - ${LIB_DIR}/util/hsa_rsrc_factory.cpp ) add_library ( ${TARGET_LIB} ${LIBRARY_TYPE} ${LIB_SRC} ) target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${ROCM_INC_PATH} ${HIP_INC_DIR} ${HSA_KMT_INC_PATH} ${GEN_INC_DIR} ) diff --git a/projects/roctracer/src/core/roctracer.cpp b/projects/roctracer/src/core/roctracer.cpp index fe4b5ee641..93580e92e6 100644 --- a/projects/roctracer/src/core/roctracer.cpp +++ b/projects/roctracer/src/core/roctracer.cpp @@ -48,6 +48,16 @@ #define CONSTRUCTOR_API __attribute__((constructor)) #define DESTRUCTOR_API __attribute__((destructor)) +#define CHECK_STATUS(msg, status) \ + do { \ + if ((status) != HSA_STATUS_SUCCESS) { \ + const char* status_string = nullptr; \ + hsa_status_string(status, &status_string); \ + ERR_LOGGING(msg << ": " << (status_string ? status_string : "")); \ + abort(); \ + } \ + } while (false) + #define HIPAPI_CALL(call) \ do { \ hipError_t err = call; \ diff --git a/projects/roctracer/src/core/tracker.h b/projects/roctracer/src/core/tracker.h index 4aed756af4..9b3af24a28 100644 --- a/projects/roctracer/src/core/tracker.h +++ b/projects/roctracer/src/core/tracker.h @@ -28,7 +28,6 @@ #include -#include "util/hsa_rsrc_factory.h" #include "util/exception.h" #include "util/logger.h" @@ -114,14 +113,7 @@ class Tracker { entry->begin = async_copy_time.start * sysclock_period; entry->end = async_copy_time.end * sysclock_period; } else { - hsa_amd_profiling_dispatch_time_t dispatch_time{}; - hsa_status_t status = - hsa_amd_profiling_get_dispatch_time(entry->agent, entry->signal, &dispatch_time); - if (status != HSA_STATUS_SUCCESS) - EXC_RAISING(ROCTRACER_STATUS_ERROR, "hsa_amd_profiling_get_dispatch_time failed"); - entry->begin = dispatch_time.start * sysclock_period; - entry->end = dispatch_time.end * sysclock_period; - entry->dev_index = ::util::HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index; + assert(false && "should not reach here"); } hsa_signal_t orig = entry->orig; diff --git a/projects/roctracer/src/util/hsa_rsrc_factory.cpp b/projects/roctracer/src/util/hsa_rsrc_factory.cpp deleted file mode 100644 index 0a70b07fc3..0000000000 --- a/projects/roctracer/src/util/hsa_rsrc_factory.cpp +++ /dev/null @@ -1,807 +0,0 @@ -/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#include "util/hsa_rsrc_factory.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -namespace util { - -// Callback function to get available in the system agents -hsa_status_t HsaRsrcFactory::GetHsaAgentsCallback(hsa_agent_t agent, void* data) { - hsa_status_t status = HSA_STATUS_ERROR; - HsaRsrcFactory* hsa_rsrc = reinterpret_cast(data); - const AgentInfo* agent_info = hsa_rsrc->AddAgentInfo(agent); - if (agent_info != NULL) status = HSA_STATUS_SUCCESS; - return status; -} - -// This function checks to see if the provided -// pool has the HSA_AMD_SEGMENT_GLOBAL property. If the kern_arg flag is true, -// the function adds an additional requirement that the pool have the -// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT property. If kern_arg is false, -// pools must NOT have this property. -// Upon finding a pool that meets these conditions, HSA_STATUS_INFO_BREAK is -// returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but -// no pool was found meeting the requirements. If an error is encountered, we -// return that error. -static hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) { - hsa_status_t err; - hsa_amd_segment_t segment; - uint32_t flag; - - if (nullptr == data) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info( - pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment); - CHECK_STATUS("hsa_amd_memory_pool_get_info", err); - if (HSA_AMD_SEGMENT_GLOBAL != segment) { - return HSA_STATUS_SUCCESS; - } - - err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info( - pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag); - CHECK_STATUS("hsa_amd_memory_pool_get_info", err); - - uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT; - - if ((karg_st == 0 && kern_arg) || (karg_st != 0 && !kern_arg)) { - return HSA_STATUS_SUCCESS; - } - - *(reinterpret_cast(data)) = pool; - return HSA_STATUS_INFO_BREAK; -} - -// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that -// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that is NOT -// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT -hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data) { - return FindGlobalPool(pool, data, false); -} - -// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that -// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that IS -// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT -hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) { - return FindGlobalPool(pool, data, true); -} - -// Constructor of the class -HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize_hsa) { - hsa_status_t status; - - cpu_pool_ = NULL; - kern_arg_pool_ = NULL; - - InitHsaApiTable(NULL); - - // Initialize the Hsa Runtime - if (initialize_hsa_) { - status = hsa_api_.hsa_init(); - CHECK_STATUS("Error in hsa_init", status); - } - - // Discover the set of Gpu devices available on the platform - status = hsa_api_.hsa_iterate_agents(GetHsaAgentsCallback, this); - CHECK_STATUS("Error Calling hsa_iterate_agents", status); - if (cpu_pool_ == NULL) CHECK_STATUS("CPU memory pool is not found", HSA_STATUS_ERROR); - if (kern_arg_pool_ == NULL) CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR); - - // Get AqlProfile API table - aqlprofile_api_ = {0}; -#ifdef ROCP_LD_AQLPROFILE - status = LoadAqlProfileLib(&aqlprofile_api_); -#else - status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, - hsa_ven_amd_aqlprofile_VERSION_MAJOR, - sizeof(aqlprofile_api_), &aqlprofile_api_); -#endif - CHECK_STATUS("aqlprofile API table load failed", status); - - // Get Loader API table - loader_api_ = {0}; - status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, - sizeof(loader_api_), &loader_api_); - CHECK_STATUS("loader API table query failed", status); - - // Instantiate HSA timer - timer_ = new HsaTimer(&hsa_api_); - CHECK_STATUS("HSA timer allocation failed", - (timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS); - - // Time correlation - const uint32_t corr_iters = 1000; - CorrelateTime(HsaTimer::TIME_ID_CLOCK_REALTIME, corr_iters); - CorrelateTime(HsaTimer::TIME_ID_CLOCK_MONOTONIC, corr_iters); - - // System timeout - timeout_ = - (timeout_ns_ == HsaTimer::TIMESTAMP_MAX) ? timeout_ns_ : timer_->ns_to_sysclock(timeout_ns_); -} - -// Destructor of the class -HsaRsrcFactory::~HsaRsrcFactory() { - delete timer_; - for (auto p : cpu_list_) delete p; - for (auto p : gpu_list_) delete p; - if (initialize_hsa_) { - hsa_status_t status = hsa_api_.hsa_shut_down(); - CHECK_STATUS("Error in hsa_shut_down", status); - } -} - -void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) { - std::lock_guard lck(mutex_); - - if (hsa_api_.hsa_init == NULL) { - if (table != NULL) { - hsa_api_.hsa_init = table->core_->hsa_init_fn; - hsa_api_.hsa_shut_down = table->core_->hsa_shut_down_fn; - hsa_api_.hsa_agent_get_info = table->core_->hsa_agent_get_info_fn; - hsa_api_.hsa_iterate_agents = table->core_->hsa_iterate_agents_fn; - - hsa_api_.hsa_queue_create = table->core_->hsa_queue_create_fn; - hsa_api_.hsa_queue_destroy = table->core_->hsa_queue_destroy_fn; - hsa_api_.hsa_queue_load_write_index_relaxed = - table->core_->hsa_queue_load_write_index_relaxed_fn; - hsa_api_.hsa_queue_store_write_index_relaxed = - table->core_->hsa_queue_store_write_index_relaxed_fn; - hsa_api_.hsa_queue_load_read_index_relaxed = - table->core_->hsa_queue_load_read_index_relaxed_fn; - - hsa_api_.hsa_signal_create = table->core_->hsa_signal_create_fn; - hsa_api_.hsa_signal_destroy = table->core_->hsa_signal_destroy_fn; - hsa_api_.hsa_signal_load_relaxed = table->core_->hsa_signal_load_relaxed_fn; - hsa_api_.hsa_signal_store_relaxed = table->core_->hsa_signal_store_relaxed_fn; - hsa_api_.hsa_signal_wait_scacquire = table->core_->hsa_signal_wait_scacquire_fn; - hsa_api_.hsa_signal_store_screlease = table->core_->hsa_signal_store_screlease_fn; - - hsa_api_.hsa_code_object_reader_create_from_file = - table->core_->hsa_code_object_reader_create_from_file_fn; - hsa_api_.hsa_executable_create_alt = table->core_->hsa_executable_create_alt_fn; - hsa_api_.hsa_executable_load_agent_code_object = - table->core_->hsa_executable_load_agent_code_object_fn; - hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn; - hsa_api_.hsa_executable_get_symbol = table->core_->hsa_executable_get_symbol_fn; - hsa_api_.hsa_executable_symbol_get_info = table->core_->hsa_executable_symbol_get_info_fn; - hsa_api_.hsa_executable_iterate_symbols = table->core_->hsa_executable_iterate_symbols_fn; - - hsa_api_.hsa_system_get_info = table->core_->hsa_system_get_info_fn; - hsa_api_.hsa_system_get_major_extension_table = - table->core_->hsa_system_get_major_extension_table_fn; - - hsa_api_.hsa_amd_agent_iterate_memory_pools = - table->amd_ext_->hsa_amd_agent_iterate_memory_pools_fn; - hsa_api_.hsa_amd_memory_pool_get_info = table->amd_ext_->hsa_amd_memory_pool_get_info_fn; - hsa_api_.hsa_amd_memory_pool_allocate = table->amd_ext_->hsa_amd_memory_pool_allocate_fn; - hsa_api_.hsa_amd_agents_allow_access = table->amd_ext_->hsa_amd_agents_allow_access_fn; - hsa_api_.hsa_amd_memory_async_copy = table->amd_ext_->hsa_amd_memory_async_copy_fn; - hsa_api_.hsa_amd_memory_async_copy_rect = table->amd_ext_->hsa_amd_memory_async_copy_rect_fn; - - hsa_api_.hsa_amd_signal_async_handler = table->amd_ext_->hsa_amd_signal_async_handler_fn; - hsa_api_.hsa_amd_profiling_set_profiler_enabled = - table->amd_ext_->hsa_amd_profiling_set_profiler_enabled_fn; - hsa_api_.hsa_amd_profiling_get_async_copy_time = - table->amd_ext_->hsa_amd_profiling_get_async_copy_time_fn; - hsa_api_.hsa_amd_profiling_get_dispatch_time = - table->amd_ext_->hsa_amd_profiling_get_dispatch_time_fn; - } else { - hsa_api_.hsa_init = hsa_init; - hsa_api_.hsa_shut_down = hsa_shut_down; - hsa_api_.hsa_agent_get_info = hsa_agent_get_info; - hsa_api_.hsa_iterate_agents = hsa_iterate_agents; - - hsa_api_.hsa_queue_create = hsa_queue_create; - hsa_api_.hsa_queue_destroy = hsa_queue_destroy; - hsa_api_.hsa_queue_load_write_index_relaxed = hsa_queue_load_write_index_relaxed; - hsa_api_.hsa_queue_store_write_index_relaxed = hsa_queue_store_write_index_relaxed; - hsa_api_.hsa_queue_load_read_index_relaxed = hsa_queue_load_read_index_relaxed; - - hsa_api_.hsa_signal_create = hsa_signal_create; - hsa_api_.hsa_signal_destroy = hsa_signal_destroy; - hsa_api_.hsa_signal_load_relaxed = hsa_signal_load_relaxed; - hsa_api_.hsa_signal_store_relaxed = hsa_signal_store_relaxed; - hsa_api_.hsa_signal_wait_scacquire = hsa_signal_wait_scacquire; - hsa_api_.hsa_signal_store_screlease = hsa_signal_store_screlease; - - hsa_api_.hsa_code_object_reader_create_from_file = hsa_code_object_reader_create_from_file; - hsa_api_.hsa_executable_create_alt = hsa_executable_create_alt; - hsa_api_.hsa_executable_load_agent_code_object = hsa_executable_load_agent_code_object; - hsa_api_.hsa_executable_freeze = hsa_executable_freeze; - hsa_api_.hsa_executable_get_symbol = hsa_executable_get_symbol; - hsa_api_.hsa_executable_symbol_get_info = hsa_executable_symbol_get_info; - hsa_api_.hsa_executable_iterate_symbols = hsa_executable_iterate_symbols; - - hsa_api_.hsa_system_get_info = hsa_system_get_info; - hsa_api_.hsa_system_get_major_extension_table = hsa_system_get_major_extension_table; - - hsa_api_.hsa_amd_agent_iterate_memory_pools = hsa_amd_agent_iterate_memory_pools; - hsa_api_.hsa_amd_memory_pool_get_info = hsa_amd_memory_pool_get_info; - hsa_api_.hsa_amd_memory_pool_allocate = hsa_amd_memory_pool_allocate; - hsa_api_.hsa_amd_agents_allow_access = hsa_amd_agents_allow_access; - hsa_api_.hsa_amd_memory_async_copy = hsa_amd_memory_async_copy; - hsa_api_.hsa_amd_memory_async_copy_rect = hsa_amd_memory_async_copy_rect; - - hsa_api_.hsa_amd_signal_async_handler = hsa_amd_signal_async_handler; - hsa_api_.hsa_amd_profiling_set_profiler_enabled = hsa_amd_profiling_set_profiler_enabled; - hsa_api_.hsa_amd_profiling_get_async_copy_time = hsa_amd_profiling_get_async_copy_time; - hsa_api_.hsa_amd_profiling_get_dispatch_time = hsa_amd_profiling_get_dispatch_time; - } - } -} - -hsa_status_t HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) { - void* handle = dlopen(kAqlProfileLib, RTLD_NOW); - if (handle == NULL) { - fprintf(stderr, "Loading '%s' failed, %s\n", kAqlProfileLib, dlerror()); - return HSA_STATUS_ERROR; - } - dlerror(); /* Clear any existing error */ - - api->hsa_ven_amd_aqlprofile_error_string = - (decltype(::hsa_ven_amd_aqlprofile_error_string)*)dlsym( - handle, "hsa_ven_amd_aqlprofile_error_string"); - api->hsa_ven_amd_aqlprofile_validate_event = - (decltype(::hsa_ven_amd_aqlprofile_validate_event)*)dlsym( - handle, "hsa_ven_amd_aqlprofile_validate_event"); - api->hsa_ven_amd_aqlprofile_start = - (decltype(::hsa_ven_amd_aqlprofile_start)*)dlsym(handle, "hsa_ven_amd_aqlprofile_start"); - api->hsa_ven_amd_aqlprofile_stop = - (decltype(::hsa_ven_amd_aqlprofile_stop)*)dlsym(handle, "hsa_ven_amd_aqlprofile_stop"); -#ifdef AQLPROF_NEW_API - api->hsa_ven_amd_aqlprofile_read = - (decltype(::hsa_ven_amd_aqlprofile_read)*)dlsym(handle, "hsa_ven_amd_aqlprofile_read"); -#endif - api->hsa_ven_amd_aqlprofile_legacy_get_pm4 = - (decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*)dlsym( - handle, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); - api->hsa_ven_amd_aqlprofile_get_info = (decltype(::hsa_ven_amd_aqlprofile_get_info)*)dlsym( - handle, "hsa_ven_amd_aqlprofile_get_info"); - api->hsa_ven_amd_aqlprofile_iterate_data = - (decltype(::hsa_ven_amd_aqlprofile_iterate_data)*)dlsym( - handle, "hsa_ven_amd_aqlprofile_iterate_data"); - - return HSA_STATUS_SUCCESS; -} - -// Add system agent info -const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) { - // Determine if device is a Gpu agent - hsa_status_t status; - AgentInfo* agent_info = NULL; - - hsa_device_type_t type; - status = hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type); - CHECK_STATUS("Error Calling hsa_agent_get_info", status); - - if (type == HSA_DEVICE_TYPE_CPU) { - agent_info = new AgentInfo{}; - agent_info->dev_id = agent; - agent_info->dev_type = HSA_DEVICE_TYPE_CPU; - agent_info->dev_index = cpu_list_.size(); - - status = - hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->cpu_pool); - if ((status == HSA_STATUS_INFO_BREAK) && (cpu_pool_ == NULL)) cpu_pool_ = &agent_info->cpu_pool; - status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool, - &agent_info->kern_arg_pool); - if ((status == HSA_STATUS_INFO_BREAK) && (kern_arg_pool_ == NULL)) - kern_arg_pool_ = &agent_info->kern_arg_pool; - agent_info->gpu_pool = {}; - - cpu_list_.push_back(agent_info); - cpu_agents_.push_back(agent); - } - - if (type == HSA_DEVICE_TYPE_GPU) { - agent_info = new AgentInfo{}; - agent_info->dev_id = agent; - agent_info->dev_type = HSA_DEVICE_TYPE_GPU; - hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name); - const int gfxip_label_len = - std::min(strlen(agent_info->name) - 2, sizeof(agent_info->gfxip) - 1); - memcpy(agent_info->gfxip, agent_info->name, gfxip_label_len); - agent_info->gfxip[gfxip_label_len] = '\0'; - hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size); - hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size); - hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile); - agent_info->is_apu = (agent_info->profile == HSA_PROFILE_FULL) ? true : false; - hsa_api_.hsa_agent_get_info( - agent, static_cast(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), - &agent_info->cu_num); - hsa_api_.hsa_agent_get_info(agent, - static_cast(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU), - &agent_info->waves_per_cu); - hsa_api_.hsa_agent_get_info(agent, - static_cast(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), - &agent_info->simds_per_cu); - hsa_api_.hsa_agent_get_info( - agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), - &agent_info->se_num); - hsa_api_.hsa_agent_get_info( - agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE), - &agent_info->shader_arrays_per_se); - - agent_info->cpu_pool = {}; - agent_info->kern_arg_pool = {}; - status = - hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool); - CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(gpu pool)", status); - - // GFX8 and GFX9 SGPR/VGPR block sizes - agent_info->sgpr_block_dflt = (strcmp(agent_info->gfxip, "gfx8") == 0) ? 1 : 2; - agent_info->sgpr_block_size = 8; - agent_info->vgpr_block_size = 4; - - // Set GPU index - agent_info->dev_index = gpu_list_.size(); - gpu_list_.push_back(agent_info); - gpu_agents_.push_back(agent); - } - - if (agent_info) agent_map_[agent.handle] = agent_info; - - return agent_info; -} - -// Return systen agent info -const AgentInfo* HsaRsrcFactory::GetAgentInfo(const hsa_agent_t agent) { - const AgentInfo* agent_info = NULL; - auto it = agent_map_.find(agent.handle); - if (it != agent_map_.end()) { - agent_info = it->second; - } - return agent_info; -} - -// Get the count of Hsa Gpu Agents available on the platform -// -// @return uint32_t Number of Gpu agents on platform -// -uint32_t HsaRsrcFactory::GetCountOfGpuAgents() { return uint32_t(gpu_list_.size()); } - -// Get the count of Hsa Cpu Agents available on the platform -// -// @return uint32_t Number of Cpu agents on platform -// -uint32_t HsaRsrcFactory::GetCountOfCpuAgents() { return uint32_t(cpu_list_.size()); } - -// Get the AgentInfo handle of a Gpu device -// -// @param idx Gpu Agent at specified index -// -// @param agent_info Output parameter updated with AgentInfo -// -// @return bool true if successful, false otherwise -// -bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) { - // Determine if request is valid - uint32_t size = uint32_t(gpu_list_.size()); - if (idx >= size) { - return false; - } - - // Copy AgentInfo from specified index - *agent_info = gpu_list_[idx]; - - return true; -} - -// Get the AgentInfo handle of a Cpu device -// -// @param idx Cpu Agent at specified index -// -// @param agent_info Output parameter updated with AgentInfo -// -// @return bool true if successful, false otherwise -// -bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) { - // Determine if request is valid - uint32_t size = uint32_t(cpu_list_.size()); - if (idx >= size) { - return false; - } - - // Copy AgentInfo from specified index - *agent_info = cpu_list_[idx]; - return true; -} - -// Create a Queue object and return its handle. The queue object is expected -// to support user requested number of Aql dispatch packets. -// -// @param agent_info Gpu Agent on which to create a queue object -// -// @param num_Pkts Number of packets to be held by queue -// -// @param queue Output parameter updated with handle of queue object -// -// @return bool true if successful, false otherwise -// -bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, - hsa_queue_t** queue) { - hsa_status_t status; - status = hsa_api_.hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL, - UINT32_MAX, UINT32_MAX, queue); - return (status == HSA_STATUS_SUCCESS); -} - -// Create a Signal object and return its handle. -// @param value Initial value of signal object -// @param signal Output parameter updated with handle of signal object -// @return bool true if successful, false otherwise -bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) { - hsa_status_t status; - status = hsa_api_.hsa_signal_create(value, 0, NULL, signal); - return (status == HSA_STATUS_SUCCESS); -} - -// Allocate memory for use by a kernel of specified size in specified -// agent's memory region. -// @param agent_info Agent from whose memory region to allocate -// @param size Size of memory in terms of bytes -// @return uint8_t* Pointer to buffer, null if allocation fails. -uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t size) { - hsa_status_t status = HSA_STATUS_ERROR; - uint8_t* buffer = NULL; - size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - status = hsa_api_.hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, - reinterpret_cast(&buffer)); - uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL; - return ptr; -} - -// Allocate memory to pass kernel parameters. -// Memory is alocated accessible for all CPU agents and for GPU given by AgentInfo parameter. -// @param agent_info Agent from whose memory region to allocate -// @param size Size of memory in terms of bytes -// @return uint8_t* Pointer to buffer, null if allocation fails. -uint8_t* HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size_t size) { - hsa_status_t status = HSA_STATUS_ERROR; - uint8_t* buffer = NULL; - if (!cpu_agents_.empty()) { - size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - status = hsa_api_.hsa_amd_memory_pool_allocate(*kern_arg_pool_, size, 0, - reinterpret_cast(&buffer)); - // Both the CPU and GPU can access the kernel arguments - if (status == HSA_STATUS_SUCCESS) { - hsa_agent_t ag_list[1] = {agent_info->dev_id}; - status = hsa_api_.hsa_amd_agents_allow_access(1, ag_list, NULL, buffer); - } - } - uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL; - return ptr; -} - -// Allocate system memory accessible by both CPU and GPU -// @param agent_info Agent from whose memory region to allocate -// @param size Size of memory in terms of bytes -// @return uint8_t* Pointer to buffer, null if allocation fails. -uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t size) { - hsa_status_t status = HSA_STATUS_ERROR; - uint8_t* buffer = NULL; - size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - if (!cpu_agents_.empty()) { - status = hsa_api_.hsa_amd_memory_pool_allocate(*cpu_pool_, size, 0, - reinterpret_cast(&buffer)); - // Both the CPU and GPU can access the memory - if (status == HSA_STATUS_SUCCESS) { - hsa_agent_t ag_list[1] = {agent_info->dev_id}; - status = hsa_api_.hsa_amd_agents_allow_access(1, ag_list, NULL, buffer); - } - } - uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL; - return ptr; -} - -// Allocate memory for command buffer. -// @param agent_info Agent from whose memory region to allocate -// @param size Size of memory in terms of bytes -// @return uint8_t* Pointer to buffer, null if allocation fails. -uint8_t* HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t size) { - size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - uint8_t* ptr = (agent_info->is_apu && CMD_MEMORY_MMAP) - ? reinterpret_cast( - mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANONYMOUS, 0, 0)) - : AllocateSysMemory(agent_info, size); - return ptr; -} - -// Wait signal -hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal, - const hsa_signal_value_t& signal_value) const { - const hsa_signal_value_t exp_value = signal_value - 1; - hsa_signal_value_t ret_value = signal_value; - while (1) { - ret_value = hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, signal_value, - timeout_, HSA_WAIT_STATE_BLOCKED); - if (ret_value == exp_value) break; - if (ret_value != signal_value) { - std::cerr << "Error: HsaRsrcFactory::SignalWait: signal_value(" << signal_value - << "), ret_value(" << ret_value << ")" << std::endl - << std::flush; - abort(); - } - } - return ret_value; -} - -// Wait signal with signal value restore -void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal, - const hsa_signal_value_t& signal_value) const { - SignalWait(signal, signal_value); - hsa_api_.hsa_signal_store_relaxed(const_cast(signal), signal_value); -} - -// Copy data from GPU to host memory -bool HsaRsrcFactory::Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size) { - hsa_status_t status = HSA_STATUS_ERROR; - if (!cpu_agents_.empty()) { - hsa_signal_t s = {}; - status = hsa_api_.hsa_signal_create(1, 0, NULL, &s); - CHECK_STATUS("hsa_signal_create()", status); - status = hsa_api_.hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s); - CHECK_STATUS("hsa_amd_memory_async_copy()", status); - SignalWait(s, 1); - status = hsa_api_.hsa_signal_destroy(s); - CHECK_STATUS("hsa_signal_destroy()", status); - } - return (status == HSA_STATUS_SUCCESS); -} -bool HsaRsrcFactory::Memcpy(const AgentInfo* agent_info, void* dst, const void* src, size_t size) { - return Memcpy(agent_info->dev_id, dst, src, size); -} - -// Memory free method -bool HsaRsrcFactory::FreeMemory(void* ptr) { - const hsa_status_t status = hsa_memory_free(ptr); - CHECK_STATUS("hsa_memory_free", status); - return (status == HSA_STATUS_SUCCESS); -} - -// Loads an Assembled Brig file and Finalizes it into Device Isa -// @param agent_info Gpu device for which to finalize -// @param brig_path File path of the Assembled Brig file -// @param kernel_name Name of the kernel to finalize -// @param code_desc Handle of finalized Code Descriptor that could -// be used to submit for execution -// @return bool true if successful, false otherwise -bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, - const char* kernel_name, hsa_executable_t* executable, - hsa_executable_symbol_t* code_desc) { - hsa_status_t status = HSA_STATUS_ERROR; - - // Build the code object filename - std::string filename(brig_path); - std::clog << "Code object filename: " << filename << std::endl; - - // Open the file containing code object - hsa_file_t file_handle = open(filename.c_str(), O_RDONLY); - if (file_handle == -1) { - std::cerr << "Error: failed to load '" << filename << "'" << std::endl; - assert(false); - return false; - } - - // Create code object reader - hsa_code_object_reader_t code_obj_rdr = {0}; - status = hsa_api_.hsa_code_object_reader_create_from_file(file_handle, &code_obj_rdr); - if (status != HSA_STATUS_SUCCESS) { - std::cerr << "Failed to create code object reader '" << filename << "'" << std::endl; - return false; - } - - // Create executable. - status = hsa_api_.hsa_executable_create_alt( - HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, executable); - CHECK_STATUS("Error in creating executable object", status); - - // Load code object. - status = hsa_api_.hsa_executable_load_agent_code_object(*executable, agent_info->dev_id, - code_obj_rdr, NULL, NULL); - CHECK_STATUS("Error in loading executable object", status); - - // Freeze executable. - status = hsa_api_.hsa_executable_freeze(*executable, ""); - CHECK_STATUS("Error in freezing executable object", status); - - // Get symbol handle. - hsa_executable_symbol_t kernelSymbol; - status = hsa_api_.hsa_executable_get_symbol(*executable, NULL, kernel_name, agent_info->dev_id, 0, - &kernelSymbol); - CHECK_STATUS("Error in looking up kernel symbol", status); - - // Update output parameter - *code_desc = kernelSymbol; - return true; -} - -// Print the various fields of Hsa Gpu Agents -bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) { - std::cout << std::flush; - std::clog << header << " :" << std::endl; - - const AgentInfo* agent_info; - int size = uint32_t(gpu_list_.size()); - for (int idx = 0; idx < size; idx++) { - agent_info = gpu_list_[idx]; - - std::clog << "> agent[" << idx << "] :" << std::endl; - std::clog << ">> Name : " << agent_info->name << std::endl; - std::clog << ">> APU : " << agent_info->is_apu << std::endl; - std::clog << ">> HSAIL profile : " << agent_info->profile << std::endl; - std::clog << ">> Max Wave Size : " << agent_info->max_wave_size << std::endl; - std::clog << ">> Max Queue Size : " << agent_info->max_queue_size << std::endl; - std::clog << ">> CU number : " << agent_info->cu_num << std::endl; - std::clog << ">> Waves per CU : " << agent_info->waves_per_cu << std::endl; - std::clog << ">> SIMDs per CU : " << agent_info->simds_per_cu << std::endl; - std::clog << ">> SE number : " << agent_info->se_num << std::endl; - std::clog << ">> Shader Arrays per SE : " << agent_info->shader_arrays_per_se << std::endl; - } - return true; -} - -uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) { - const uint32_t slot_size_b = CMD_SLOT_SIZE_B; - - // adevance command queue - const uint64_t write_idx = hsa_api_.hsa_queue_load_write_index_relaxed(queue); - hsa_api_.hsa_queue_store_write_index_relaxed(queue, write_idx + 1); - while ((write_idx - hsa_api_.hsa_queue_load_read_index_relaxed(queue)) >= queue->size) { - sched_yield(); - } - - uint32_t slot_idx = (uint32_t)(write_idx % queue->size); - uint32_t* queue_slot = - reinterpret_cast((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b)); - const uint32_t* slot_data = reinterpret_cast(packet); - - // Copy buffered commands into the queue slot. - // Overwrite the AQL invalid header (first dword) last. - // This prevents the slot from being read until it's fully written. - memcpy(&queue_slot[1], &slot_data[1], slot_size_b - sizeof(uint32_t)); - std::atomic* header_atomic_ptr = - reinterpret_cast*>(&queue_slot[0]); - header_atomic_ptr->store(slot_data[0], std::memory_order_release); - - // ringdoor bell - hsa_api_.hsa_signal_store_relaxed(queue->doorbell_signal, write_idx); - - return write_idx; -} - -uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes) { - const uint32_t slot_size_b = CMD_SLOT_SIZE_B; - if ((size_bytes & (slot_size_b - 1)) != 0) { - fprintf(stderr, "HsaRsrcFactory::Submit: Bad packet size %zx\n", size_bytes); - abort(); - } - - const char* begin = reinterpret_cast(packet); - const char* end = begin + size_bytes; - uint64_t write_idx = 0; - for (const char* ptr = begin; ptr < end; ptr += slot_size_b) { - write_idx = Submit(queue, ptr); - } - - return write_idx; -} - -const char* HsaRsrcFactory::GetKernelName(uint64_t addr) { - std::lock_guard lck(mutex_); - const auto it = symbols_map_->find(addr); - if (it == symbols_map_->end()) { - fprintf(stderr, "HsaRsrcFactory::kernel addr (0x%lx) is not found\n", addr); - abort(); - } - return strdup(it->second); -} - -void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) { - std::lock_guard lck(mutex_); - executable_tracking_on_ = true; - table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor; -} - -hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, - hsa_executable_symbol_t symbol, void* data) { - hsa_symbol_kind_t value = (hsa_symbol_kind_t)0; - hsa_status_t status = - hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value); - CHECK_STATUS("Error in getting symbol info", status); - if (value == HSA_SYMBOL_KIND_KERNEL) { - uint64_t addr = 0; - uint32_t len = 0; - status = hsa_api_.hsa_executable_symbol_get_info( - symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr); - CHECK_STATUS("Error in getting kernel object", status); - status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, - &len); - CHECK_STATUS("Error in getting name len", status); - char* name = new char[len + 1]; - status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, name); - CHECK_STATUS("Error in getting kernel name", status); - name[len] = 0; - auto ret = symbols_map_->insert({addr, name}); - if (ret.second == false) { - delete[] ret.first->second; - ret.first->second = name; - } - } - return HSA_STATUS_SUCCESS; -} - -hsa_status_t HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable, - const char* options) { - std::lock_guard lck(mutex_); - if (symbols_map_ == NULL) symbols_map_ = new symbols_map_t; - hsa_status_t status = - hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, NULL); - CHECK_STATUS("Error in iterating executable symbols", status); - return hsa_api_.hsa_executable_freeze(executable, options); - ; -} - -void HsaRsrcFactory::DumpHandles(FILE* file) { - auto beg = agent_map_.begin(); - auto end = agent_map_.end(); - for (auto it = beg; it != end; ++it) { - const AgentInfo* agent_info = it->second; - fprintf(file, "0x%lx agent %s\n", agent_info->dev_id.handle, - (agent_info->dev_type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu"); - if (agent_info->cpu_pool.handle != 0) - fprintf(file, "0x%lx pool cpu\n", agent_info->cpu_pool.handle); - if (agent_info->kern_arg_pool.handle != 0) - fprintf(file, "0x%lx pool cpu kernarg\n", agent_info->kern_arg_pool.handle); - if (agent_info->gpu_pool.handle != 0) - fprintf(file, "0x%lx pool gpu%u\n", agent_info->gpu_pool.handle, agent_info->dev_index); - } - fflush(file); -} - -std::atomic HsaRsrcFactory::instance_{}; -HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_; -HsaRsrcFactory::timestamp_t HsaRsrcFactory::timeout_ns_ = HsaTimer::TIMESTAMP_MAX; -hsa_pfn_t HsaRsrcFactory::hsa_api_{}; -bool HsaRsrcFactory::executable_tracking_on_ = false; -HsaRsrcFactory::symbols_map_t* HsaRsrcFactory::symbols_map_ = NULL; - -} // namespace util diff --git a/projects/roctracer/src/util/hsa_rsrc_factory.h b/projects/roctracer/src/util/hsa_rsrc_factory.h deleted file mode 100644 index 325a3c9042..0000000000 --- a/projects/roctracer/src/util/hsa_rsrc_factory.h +++ /dev/null @@ -1,520 +0,0 @@ -/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#ifndef SRC_UTIL_HSA_RSRC_FACTORY_H_ -#define SRC_UTIL_HSA_RSRC_FACTORY_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#define HSA_ARGUMENT_ALIGN_BYTES 16 -#define HSA_QUEUE_ALIGN_BYTES 64 -#define HSA_PACKET_ALIGN_BYTES 64 - -#define CHECK_STATUS(msg, status) \ - do { \ - if ((status) != HSA_STATUS_SUCCESS) { \ - const char* emsg = 0; \ - hsa_status_string(status, &emsg); \ - printf("%s: %s\n", msg, emsg ? emsg : ""); \ - abort(); \ - } \ - } while (0) - -#define CHECK_ITER_STATUS(msg, status) \ - do { \ - if ((status) != HSA_STATUS_INFO_BREAK) { \ - const char* emsg = 0; \ - hsa_status_string(status, &emsg); \ - printf("%s: %s\n", msg, emsg ? emsg : ""); \ - abort(); \ - } \ - } while (0) - -namespace util { -static const size_t MEM_PAGE_BYTES = 0x1000; -static const size_t MEM_PAGE_MASK = MEM_PAGE_BYTES - 1; -typedef decltype(hsa_agent_t::handle) hsa_agent_handle_t; - -struct hsa_pfn_t { - decltype(hsa_init)* hsa_init; - decltype(hsa_shut_down)* hsa_shut_down; - decltype(hsa_agent_get_info)* hsa_agent_get_info; - decltype(hsa_iterate_agents)* hsa_iterate_agents; - - decltype(hsa_queue_create)* hsa_queue_create; - decltype(hsa_queue_destroy)* hsa_queue_destroy; - decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed; - decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed; - decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed; - - decltype(hsa_signal_create)* hsa_signal_create; - decltype(hsa_signal_destroy)* hsa_signal_destroy; - decltype(hsa_signal_load_relaxed)* hsa_signal_load_relaxed; - decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed; - decltype(hsa_signal_wait_scacquire)* hsa_signal_wait_scacquire; - decltype(hsa_signal_store_screlease)* hsa_signal_store_screlease; - - decltype(hsa_code_object_reader_create_from_file)* hsa_code_object_reader_create_from_file; - decltype(hsa_executable_create_alt)* hsa_executable_create_alt; - decltype(hsa_executable_load_agent_code_object)* hsa_executable_load_agent_code_object; - decltype(hsa_executable_freeze)* hsa_executable_freeze; - decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol; - decltype(hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info; - decltype(hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols; - - decltype(hsa_system_get_info)* hsa_system_get_info; - decltype(hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table; - - decltype(hsa_amd_agent_iterate_memory_pools)* hsa_amd_agent_iterate_memory_pools; - decltype(hsa_amd_memory_pool_get_info)* hsa_amd_memory_pool_get_info; - decltype(hsa_amd_memory_pool_allocate)* hsa_amd_memory_pool_allocate; - decltype(hsa_amd_agents_allow_access)* hsa_amd_agents_allow_access; - decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy; - decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect; - - decltype(hsa_amd_signal_async_handler)* hsa_amd_signal_async_handler; - decltype(hsa_amd_profiling_set_profiler_enabled)* hsa_amd_profiling_set_profiler_enabled; - decltype(hsa_amd_profiling_get_async_copy_time)* hsa_amd_profiling_get_async_copy_time; - decltype(hsa_amd_profiling_get_dispatch_time)* hsa_amd_profiling_get_dispatch_time; -}; - -// Encapsulates information about a Hsa Agent such as its -// handle, name, max queue size, max wavefront size, etc. -struct AgentInfo { - // Handle of Agent - hsa_agent_t dev_id; - - // Agent type - Cpu = 0, Gpu = 1 or Dsp = 2 - uint32_t dev_type; - - // APU flag - bool is_apu; - - // Agent system index - uint32_t dev_index; - - // GFXIP name - char gfxip[64]; - - // Name of Agent whose length is less than 64 - char name[64]; - - // Max size of Wavefront size - uint32_t max_wave_size; - - // Max size of Queue buffer - uint32_t max_queue_size; - - // Hsail profile supported by agent - hsa_profile_t profile; - - // CPU/GPU/kern-arg memory pools - hsa_amd_memory_pool_t cpu_pool; - hsa_amd_memory_pool_t gpu_pool; - hsa_amd_memory_pool_t kern_arg_pool; - - // The number of compute unit available in the agent. - uint32_t cu_num; - - // Maximum number of waves possible in a Compute Unit. - uint32_t waves_per_cu; - - // Number of SIMD's per compute unit CU - uint32_t simds_per_cu; - - // Number of Shader Engines (SE) in Gpu - uint32_t se_num; - - // Number of Shader Arrays Per Shader Engines in Gpu - uint32_t shader_arrays_per_se; - - // SGPR/VGPR block sizes - uint32_t sgpr_block_dflt; - uint32_t sgpr_block_size; - uint32_t vgpr_block_size; -}; - -// HSA timer class -// Provides current HSA timestampa and system-clock/ns conversion API -class HsaTimer { - public: - typedef uint64_t timestamp_t; - static const timestamp_t TIMESTAMP_MAX = UINT64_MAX; - typedef long double freq_t; - - enum time_id_t { TIME_ID_CLOCK_REALTIME = 0, TIME_ID_CLOCK_MONOTONIC = 1, TIME_ID_NUMBER }; - - HsaTimer(const hsa_pfn_t* hsa_api) : hsa_api_(hsa_api) { - timestamp_t sysclock_hz = 0; - hsa_status_t status = - hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); - CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY)", status); - sysclock_factor_ = (freq_t)1000000000 / (freq_t)sysclock_hz; - } - - // Methods for system-clock/ns conversion - timestamp_t sysclock_to_ns(const timestamp_t& sysclock) const { - return timestamp_t((freq_t)sysclock * sysclock_factor_); - } - timestamp_t ns_to_sysclock(const timestamp_t& time) const { - return timestamp_t((freq_t)time / sysclock_factor_); - } - - // Method for timespec/ns conversion - static timestamp_t timespec_to_ns(const timespec& time) { - return ((timestamp_t)time.tv_sec * 1000000000) + time.tv_nsec; - } - - // Return timestamp in 'ns' - timestamp_t timestamp_ns() const { - timestamp_t sysclock; - hsa_status_t status = hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock); - CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP)", status); - return sysclock_to_ns(sysclock); - } - - // Return time in 'ns' - static timestamp_t clocktime_ns(clockid_t clock_id) { - timespec time; - clock_gettime(clock_id, &time); - return timespec_to_ns(time); - } - - // Return pair of correlated values of profiling timestamp and time with - // correlation error for a given time ID and number of iterations - void correlated_pair_ns(time_id_t time_id, uint32_t iters, timestamp_t* timestamp_v, - timestamp_t* time_v, timestamp_t* error_v) const { - clockid_t clock_id = 0; - switch (time_id) { - case TIME_ID_CLOCK_REALTIME: - clock_id = CLOCK_REALTIME; - break; - case TIME_ID_CLOCK_MONOTONIC: - clock_id = CLOCK_MONOTONIC; - break; - default: - CHECK_STATUS("internal error: invalid time_id", HSA_STATUS_ERROR); - } - - std::vector ts_vec(iters); - std::vector tm_vec(iters); - const uint32_t steps = iters - 1; - - for (uint32_t i = 0; i < iters; ++i) { - hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &ts_vec[i]); - clock_gettime(clock_id, &tm_vec[i]); - } - - const timestamp_t ts_base = sysclock_to_ns(ts_vec.front()); - const timestamp_t tm_base = timespec_to_ns(tm_vec.front()); - const timestamp_t error = (ts_vec.back() - ts_vec.front()) / (2 * steps); - - timestamp_t ts_accum = 0; - timestamp_t tm_accum = 0; - for (uint32_t i = 0; i < iters; ++i) { - ts_accum += (ts_vec[i] - ts_base); - tm_accum += (timespec_to_ns(tm_vec[i]) - tm_base); - } - - *timestamp_v = (ts_accum / iters) + ts_base + error; - *time_v = (tm_accum / iters) + tm_base; - *error_v = error; - } - - private: - // Timestamp frequency factor - freq_t sysclock_factor_; - // HSA API table - const hsa_pfn_t* const hsa_api_; -}; - -class HsaRsrcFactory { - public: - static const size_t CMD_SLOT_SIZE_B = 0x40; - typedef std::recursive_mutex mutex_t; - typedef HsaTimer::timestamp_t timestamp_t; - - static HsaRsrcFactory* Create(bool initialize_hsa = true) { - std::lock_guard lck(mutex_); - HsaRsrcFactory* obj = instance_.load(std::memory_order_relaxed); - if (obj == NULL) { - obj = new HsaRsrcFactory(initialize_hsa); - instance_.store(obj, std::memory_order_release); - } - return obj; - } - - static HsaRsrcFactory& Instance() { - HsaRsrcFactory* obj = instance_.load(std::memory_order_acquire); - if (obj == NULL) obj = Create(false); - hsa_status_t status = (obj != NULL) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; - CHECK_STATUS("HsaRsrcFactory::Instance() failed", status); - return *obj; - } - - static void Destroy() { - std::lock_guard lck(mutex_); - if (instance_) delete instance_.load(); - instance_ = NULL; - } - - // Return system agent info - const AgentInfo* GetAgentInfo(const hsa_agent_t agent); - - // Get the count of Hsa Gpu Agents available on the platform - // @return uint32_t Number of Gpu agents on platform - uint32_t GetCountOfGpuAgents(); - - // Get the count of Hsa Cpu Agents available on the platform - // @return uint32_t Number of Cpu agents on platform - uint32_t GetCountOfCpuAgents(); - - // Get the AgentInfo handle of a Gpu device - // @param idx Gpu Agent at specified index - // @param agent_info Output parameter updated with AgentInfo - // @return bool true if successful, false otherwise - bool GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info); - - // Get the AgentInfo handle of a Cpu device - // @param idx Cpu Agent at specified index - // @param agent_info Output parameter updated with AgentInfo - // @return bool true if successful, false otherwise - bool GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info); - - // Create a Queue object and return its handle. The queue object is expected - // to support user requested number of Aql dispatch packets. - // @param agent_info Gpu Agent on which to create a queue object - // @param num_Pkts Number of packets to be held by queue - // @param queue Output parameter updated with handle of queue object - // @return bool true if successful, false otherwise - bool CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue); - - // Create a Signal object and return its handle. - // @param value Initial value of signal object - // @param signal Output parameter updated with handle of signal object - // @return bool true if successful, false otherwise - bool CreateSignal(uint32_t value, hsa_signal_t* signal); - - // Allocate local GPU memory - // @param agent_info Agent from whose memory region to allocate - // @param size Size of memory in terms of bytes - // @return uint8_t* Pointer to buffer, null if allocation fails. - uint8_t* AllocateLocalMemory(const AgentInfo* agent_info, size_t size); - - // Allocate memory tp pass kernel parameters - // Memory is alocated accessible for all CPU agents and for GPU given by AgentInfo parameter. - // @param agent_info Agent from whose memory region to allocate - // @param size Size of memory in terms of bytes - // @return uint8_t* Pointer to buffer, null if allocation fails. - uint8_t* AllocateKernArgMemory(const AgentInfo* agent_info, size_t size); - - // Allocate system memory accessible from both CPU and GPU - // Memory is alocated accessible to all CPU agents and AgentInfo parameter is ignored. - // @param agent_info Agent from whose memory region to allocate - // @param size Size of memory in terms of bytes - // @return uint8_t* Pointer to buffer, null if allocation fails. - uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size); - - // Allocate memory for command buffer. - // @param agent_info Agent from whose memory region to allocate - // @param size Size of memory in terms of bytes - // @return uint8_t* Pointer to buffer, null if allocation fails. - uint8_t* AllocateCmdMemory(const AgentInfo* agent_info, size_t size); - - // Wait signal - hsa_signal_value_t SignalWait(const hsa_signal_t& signal, - const hsa_signal_value_t& signal_value) const; - - // Wait signal with signal value restore - void SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const; - - // Copy data from GPU to host memory - bool Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size); - bool Memcpy(const AgentInfo* agent_info, void* dst, const void* src, size_t size); - - // Memory free method - static bool FreeMemory(void* ptr); - - // Loads an Assembled Brig file and Finalizes it into Device Isa - // @param agent_info Gpu device for which to finalize - // @param brig_path File path of the Assembled Brig file - // @param kernel_name Name of the kernel to finalize - // @param code_desc Handle of finalized Code Descriptor that could - // be used to submit for execution - // @return true if successful, false otherwise - bool LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, const char* kernel_name, - hsa_executable_t* hsa_exec, hsa_executable_symbol_t* code_desc); - - // Print the various fields of Hsa Gpu Agents - bool PrintGpuAgents(const std::string& header); - - // Submit AQL packet to given queue - static uint64_t Submit(hsa_queue_t* queue, const void* packet); - static uint64_t Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes); - - // Enable executables loading tracking - static bool IsExecutableTracking() { return executable_tracking_on_; } - static void EnableExecutableTracking(HsaApiTable* table); - static const char* GetKernelName(uint64_t addr); - - // Initialize HSA API table - void static InitHsaApiTable(HsaApiTable* table); - static const hsa_pfn_t* HsaApi() { return &hsa_api_; } - - // Return AqlProfile API table - typedef hsa_ven_amd_aqlprofile_pfn_t aqlprofile_pfn_t; - const aqlprofile_pfn_t* AqlProfileApi() const { return &aqlprofile_api_; } - - // Return Loader API table - const hsa_ven_amd_loader_1_00_pfn_t* LoaderApi() const { return &loader_api_; } - - // Methods for system-clock/ns conversion and timestamp in 'ns' - timestamp_t SysclockToNs(const timestamp_t& sysclock) const { - return timer_->sysclock_to_ns(sysclock); - } - timestamp_t NsToSysclock(const timestamp_t& time) const { return timer_->ns_to_sysclock(time); } - timestamp_t TimestampNs() const { return timer_->timestamp_ns(); } - - timestamp_t GetSysTimeout() const { return timeout_; } - static timestamp_t GetTimeoutNs() { return timeout_ns_; } - static void SetTimeoutNs(const timestamp_t& time) { - std::lock_guard lck(mutex_); - timeout_ns_ = time; - if (instance_ != NULL) Instance().timeout_ = Instance().timer_->ns_to_sysclock(time); - } - - void CorrelateTime(HsaTimer::time_id_t time_id, uint32_t iters) { - timestamp_t timestamp_v = 0; - timestamp_t time_v = 0; - timestamp_t error_v = 0; - timer_->correlated_pair_ns(time_id, iters, ×tamp_v, &time_v, &error_v); - time_shift_[time_id] = time_v - timestamp_v; - time_error_[time_id] = error_v; - } - - hsa_status_t GetTime(uint32_t time_id, timestamp_t value, uint64_t* time) { - if (time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR; - *time = value + time_shift_[time_id]; - return HSA_STATUS_SUCCESS; - } - - hsa_status_t GetTimestamp(uint32_t time_id, uint64_t value, timestamp_t* timestamp) { - if (time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR; - *timestamp = value - time_shift_[time_id]; - return HSA_STATUS_SUCCESS; - } - - void DumpHandles(FILE* output_file); - - private: - // System agents iterating callback - static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data); - - // Callback function to find and bind kernarg region of an agent - static hsa_status_t FindMemRegionsCallback(hsa_region_t region, void* data); - - // Load AQL profile HSA extension library directly - static hsa_status_t LoadAqlProfileLib(aqlprofile_pfn_t* api); - - // Constructor of the class. Will initialize the Hsa Runtime and - // query the system topology to get the list of Cpu and Gpu devices - explicit HsaRsrcFactory(bool initialize_hsa); - - // Destructor of the class - ~HsaRsrcFactory(); - - // Add an instance of AgentInfo representing a Hsa Gpu agent - const AgentInfo* AddAgentInfo(const hsa_agent_t agent); - - // To mmap command buffer memory - static const bool CMD_MEMORY_MMAP = false; - - // HSA was initialized - const bool initialize_hsa_; - - static std::atomic instance_; - static mutex_t mutex_; - - // Used to maintain a list of Hsa Gpu Agent Info - std::vector gpu_list_; - std::vector gpu_agents_; - - // Used to maintain a list of Hsa Cpu Agent Info - std::vector cpu_list_; - std::vector cpu_agents_; - - // System agents map - std::map agent_map_; - - // Executables loading tracking - typedef std::map symbols_map_t; - static symbols_map_t* symbols_map_; - static bool executable_tracking_on_; - static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, - const char* options); - static hsa_status_t executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, - void* data); - - // HSA runtime API table - static hsa_pfn_t hsa_api_; - - // AqlProfile API table - aqlprofile_pfn_t aqlprofile_api_; - - // Loader API table - hsa_ven_amd_loader_1_00_pfn_t loader_api_; - - // System timeout, ns - static timestamp_t timeout_ns_; - // System timeout, sysclock - timestamp_t timeout_; - - // HSA timer - HsaTimer* timer_; - - // Time shift array to support time conversion - timestamp_t time_shift_[HsaTimer::TIME_ID_NUMBER]; - timestamp_t time_error_[HsaTimer::TIME_ID_NUMBER]; - - // CPU/kern-arg memory pools - hsa_amd_memory_pool_t* cpu_pool_; - hsa_amd_memory_pool_t* kern_arg_pool_; -}; - -} // namespace util - -#endif // SRC_UTIL_HSA_RSRC_FACTORY_H_