Remove now unused hsa_rsrc_factory
Change-Id: I66175eb9fae2e7e61400af77a0c89be9c39e770e
Этот коммит содержится в:
@@ -40,7 +40,6 @@ execute_process ( COMMAND sh -xc "ln -s ${ROOT_DIR}/../rocprofiler/src/core/acti
|
||||
set ( TARGET_LIB ${TARGET_NAME} )
|
||||
set ( LIB_SRC
|
||||
${LIB_DIR}/core/roctracer.cpp
|
||||
${LIB_DIR}/util/hsa_rsrc_factory.cpp
|
||||
)
|
||||
add_library ( ${TARGET_LIB} ${LIBRARY_TYPE} ${LIB_SRC} )
|
||||
target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${ROCM_INC_PATH} ${HIP_INC_DIR} ${HSA_KMT_INC_PATH} ${GEN_INC_DIR} )
|
||||
|
||||
@@ -48,6 +48,16 @@
|
||||
#define CONSTRUCTOR_API __attribute__((constructor))
|
||||
#define DESTRUCTOR_API __attribute__((destructor))
|
||||
|
||||
#define CHECK_STATUS(msg, status) \
|
||||
do { \
|
||||
if ((status) != HSA_STATUS_SUCCESS) { \
|
||||
const char* status_string = nullptr; \
|
||||
hsa_status_string(status, &status_string); \
|
||||
ERR_LOGGING(msg << ": " << (status_string ? status_string : "<unknown error>")); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define HIPAPI_CALL(call) \
|
||||
do { \
|
||||
hipError_t err = call; \
|
||||
|
||||
@@ -28,7 +28,6 @@
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
#include "util/exception.h"
|
||||
#include "util/logger.h"
|
||||
|
||||
@@ -114,14 +113,7 @@ class Tracker {
|
||||
entry->begin = async_copy_time.start * sysclock_period;
|
||||
entry->end = async_copy_time.end * sysclock_period;
|
||||
} else {
|
||||
hsa_amd_profiling_dispatch_time_t dispatch_time{};
|
||||
hsa_status_t status =
|
||||
hsa_amd_profiling_get_dispatch_time(entry->agent, entry->signal, &dispatch_time);
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR, "hsa_amd_profiling_get_dispatch_time failed");
|
||||
entry->begin = dispatch_time.start * sysclock_period;
|
||||
entry->end = dispatch_time.end * sysclock_period;
|
||||
entry->dev_index = ::util::HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index;
|
||||
assert(false && "should not reach here");
|
||||
}
|
||||
|
||||
hsa_signal_t orig = entry->orig;
|
||||
|
||||
@@ -1,807 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
#include <dlfcn.h>
|
||||
#include <fcntl.h>
|
||||
#include <hsa.h>
|
||||
#include <hsa_ext_amd.h>
|
||||
#include <hsa_ext_finalize.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace util {
|
||||
|
||||
// Callback function to get available in the system agents
|
||||
hsa_status_t HsaRsrcFactory::GetHsaAgentsCallback(hsa_agent_t agent, void* data) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
HsaRsrcFactory* hsa_rsrc = reinterpret_cast<HsaRsrcFactory*>(data);
|
||||
const AgentInfo* agent_info = hsa_rsrc->AddAgentInfo(agent);
|
||||
if (agent_info != NULL) status = HSA_STATUS_SUCCESS;
|
||||
return status;
|
||||
}
|
||||
|
||||
// This function checks to see if the provided
|
||||
// pool has the HSA_AMD_SEGMENT_GLOBAL property. If the kern_arg flag is true,
|
||||
// the function adds an additional requirement that the pool have the
|
||||
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT property. If kern_arg is false,
|
||||
// pools must NOT have this property.
|
||||
// Upon finding a pool that meets these conditions, HSA_STATUS_INFO_BREAK is
|
||||
// returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but
|
||||
// no pool was found meeting the requirements. If an error is encountered, we
|
||||
// return that error.
|
||||
static hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) {
|
||||
hsa_status_t err;
|
||||
hsa_amd_segment_t segment;
|
||||
uint32_t flag;
|
||||
|
||||
if (nullptr == data) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
|
||||
CHECK_STATUS("hsa_amd_memory_pool_get_info", err);
|
||||
if (HSA_AMD_SEGMENT_GLOBAL != segment) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
|
||||
CHECK_STATUS("hsa_amd_memory_pool_get_info", err);
|
||||
|
||||
uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
|
||||
|
||||
if ((karg_st == 0 && kern_arg) || (karg_st != 0 && !kern_arg)) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
*(reinterpret_cast<hsa_amd_memory_pool_t*>(data)) = pool;
|
||||
return HSA_STATUS_INFO_BREAK;
|
||||
}
|
||||
|
||||
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
|
||||
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that is NOT
|
||||
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
|
||||
hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data) {
|
||||
return FindGlobalPool(pool, data, false);
|
||||
}
|
||||
|
||||
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
|
||||
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that IS
|
||||
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
|
||||
hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) {
|
||||
return FindGlobalPool(pool, data, true);
|
||||
}
|
||||
|
||||
// Constructor of the class
|
||||
HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize_hsa) {
|
||||
hsa_status_t status;
|
||||
|
||||
cpu_pool_ = NULL;
|
||||
kern_arg_pool_ = NULL;
|
||||
|
||||
InitHsaApiTable(NULL);
|
||||
|
||||
// Initialize the Hsa Runtime
|
||||
if (initialize_hsa_) {
|
||||
status = hsa_api_.hsa_init();
|
||||
CHECK_STATUS("Error in hsa_init", status);
|
||||
}
|
||||
|
||||
// Discover the set of Gpu devices available on the platform
|
||||
status = hsa_api_.hsa_iterate_agents(GetHsaAgentsCallback, this);
|
||||
CHECK_STATUS("Error Calling hsa_iterate_agents", status);
|
||||
if (cpu_pool_ == NULL) CHECK_STATUS("CPU memory pool is not found", HSA_STATUS_ERROR);
|
||||
if (kern_arg_pool_ == NULL) CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR);
|
||||
|
||||
// Get AqlProfile API table
|
||||
aqlprofile_api_ = {0};
|
||||
#ifdef ROCP_LD_AQLPROFILE
|
||||
status = LoadAqlProfileLib(&aqlprofile_api_);
|
||||
#else
|
||||
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE,
|
||||
hsa_ven_amd_aqlprofile_VERSION_MAJOR,
|
||||
sizeof(aqlprofile_api_), &aqlprofile_api_);
|
||||
#endif
|
||||
CHECK_STATUS("aqlprofile API table load failed", status);
|
||||
|
||||
// Get Loader API table
|
||||
loader_api_ = {0};
|
||||
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1,
|
||||
sizeof(loader_api_), &loader_api_);
|
||||
CHECK_STATUS("loader API table query failed", status);
|
||||
|
||||
// Instantiate HSA timer
|
||||
timer_ = new HsaTimer(&hsa_api_);
|
||||
CHECK_STATUS("HSA timer allocation failed",
|
||||
(timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS);
|
||||
|
||||
// Time correlation
|
||||
const uint32_t corr_iters = 1000;
|
||||
CorrelateTime(HsaTimer::TIME_ID_CLOCK_REALTIME, corr_iters);
|
||||
CorrelateTime(HsaTimer::TIME_ID_CLOCK_MONOTONIC, corr_iters);
|
||||
|
||||
// System timeout
|
||||
timeout_ =
|
||||
(timeout_ns_ == HsaTimer::TIMESTAMP_MAX) ? timeout_ns_ : timer_->ns_to_sysclock(timeout_ns_);
|
||||
}
|
||||
|
||||
// Destructor of the class
|
||||
HsaRsrcFactory::~HsaRsrcFactory() {
|
||||
delete timer_;
|
||||
for (auto p : cpu_list_) delete p;
|
||||
for (auto p : gpu_list_) delete p;
|
||||
if (initialize_hsa_) {
|
||||
hsa_status_t status = hsa_api_.hsa_shut_down();
|
||||
CHECK_STATUS("Error in hsa_shut_down", status);
|
||||
}
|
||||
}
|
||||
|
||||
void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
|
||||
if (hsa_api_.hsa_init == NULL) {
|
||||
if (table != NULL) {
|
||||
hsa_api_.hsa_init = table->core_->hsa_init_fn;
|
||||
hsa_api_.hsa_shut_down = table->core_->hsa_shut_down_fn;
|
||||
hsa_api_.hsa_agent_get_info = table->core_->hsa_agent_get_info_fn;
|
||||
hsa_api_.hsa_iterate_agents = table->core_->hsa_iterate_agents_fn;
|
||||
|
||||
hsa_api_.hsa_queue_create = table->core_->hsa_queue_create_fn;
|
||||
hsa_api_.hsa_queue_destroy = table->core_->hsa_queue_destroy_fn;
|
||||
hsa_api_.hsa_queue_load_write_index_relaxed =
|
||||
table->core_->hsa_queue_load_write_index_relaxed_fn;
|
||||
hsa_api_.hsa_queue_store_write_index_relaxed =
|
||||
table->core_->hsa_queue_store_write_index_relaxed_fn;
|
||||
hsa_api_.hsa_queue_load_read_index_relaxed =
|
||||
table->core_->hsa_queue_load_read_index_relaxed_fn;
|
||||
|
||||
hsa_api_.hsa_signal_create = table->core_->hsa_signal_create_fn;
|
||||
hsa_api_.hsa_signal_destroy = table->core_->hsa_signal_destroy_fn;
|
||||
hsa_api_.hsa_signal_load_relaxed = table->core_->hsa_signal_load_relaxed_fn;
|
||||
hsa_api_.hsa_signal_store_relaxed = table->core_->hsa_signal_store_relaxed_fn;
|
||||
hsa_api_.hsa_signal_wait_scacquire = table->core_->hsa_signal_wait_scacquire_fn;
|
||||
hsa_api_.hsa_signal_store_screlease = table->core_->hsa_signal_store_screlease_fn;
|
||||
|
||||
hsa_api_.hsa_code_object_reader_create_from_file =
|
||||
table->core_->hsa_code_object_reader_create_from_file_fn;
|
||||
hsa_api_.hsa_executable_create_alt = table->core_->hsa_executable_create_alt_fn;
|
||||
hsa_api_.hsa_executable_load_agent_code_object =
|
||||
table->core_->hsa_executable_load_agent_code_object_fn;
|
||||
hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn;
|
||||
hsa_api_.hsa_executable_get_symbol = table->core_->hsa_executable_get_symbol_fn;
|
||||
hsa_api_.hsa_executable_symbol_get_info = table->core_->hsa_executable_symbol_get_info_fn;
|
||||
hsa_api_.hsa_executable_iterate_symbols = table->core_->hsa_executable_iterate_symbols_fn;
|
||||
|
||||
hsa_api_.hsa_system_get_info = table->core_->hsa_system_get_info_fn;
|
||||
hsa_api_.hsa_system_get_major_extension_table =
|
||||
table->core_->hsa_system_get_major_extension_table_fn;
|
||||
|
||||
hsa_api_.hsa_amd_agent_iterate_memory_pools =
|
||||
table->amd_ext_->hsa_amd_agent_iterate_memory_pools_fn;
|
||||
hsa_api_.hsa_amd_memory_pool_get_info = table->amd_ext_->hsa_amd_memory_pool_get_info_fn;
|
||||
hsa_api_.hsa_amd_memory_pool_allocate = table->amd_ext_->hsa_amd_memory_pool_allocate_fn;
|
||||
hsa_api_.hsa_amd_agents_allow_access = table->amd_ext_->hsa_amd_agents_allow_access_fn;
|
||||
hsa_api_.hsa_amd_memory_async_copy = table->amd_ext_->hsa_amd_memory_async_copy_fn;
|
||||
hsa_api_.hsa_amd_memory_async_copy_rect = table->amd_ext_->hsa_amd_memory_async_copy_rect_fn;
|
||||
|
||||
hsa_api_.hsa_amd_signal_async_handler = table->amd_ext_->hsa_amd_signal_async_handler_fn;
|
||||
hsa_api_.hsa_amd_profiling_set_profiler_enabled =
|
||||
table->amd_ext_->hsa_amd_profiling_set_profiler_enabled_fn;
|
||||
hsa_api_.hsa_amd_profiling_get_async_copy_time =
|
||||
table->amd_ext_->hsa_amd_profiling_get_async_copy_time_fn;
|
||||
hsa_api_.hsa_amd_profiling_get_dispatch_time =
|
||||
table->amd_ext_->hsa_amd_profiling_get_dispatch_time_fn;
|
||||
} else {
|
||||
hsa_api_.hsa_init = hsa_init;
|
||||
hsa_api_.hsa_shut_down = hsa_shut_down;
|
||||
hsa_api_.hsa_agent_get_info = hsa_agent_get_info;
|
||||
hsa_api_.hsa_iterate_agents = hsa_iterate_agents;
|
||||
|
||||
hsa_api_.hsa_queue_create = hsa_queue_create;
|
||||
hsa_api_.hsa_queue_destroy = hsa_queue_destroy;
|
||||
hsa_api_.hsa_queue_load_write_index_relaxed = hsa_queue_load_write_index_relaxed;
|
||||
hsa_api_.hsa_queue_store_write_index_relaxed = hsa_queue_store_write_index_relaxed;
|
||||
hsa_api_.hsa_queue_load_read_index_relaxed = hsa_queue_load_read_index_relaxed;
|
||||
|
||||
hsa_api_.hsa_signal_create = hsa_signal_create;
|
||||
hsa_api_.hsa_signal_destroy = hsa_signal_destroy;
|
||||
hsa_api_.hsa_signal_load_relaxed = hsa_signal_load_relaxed;
|
||||
hsa_api_.hsa_signal_store_relaxed = hsa_signal_store_relaxed;
|
||||
hsa_api_.hsa_signal_wait_scacquire = hsa_signal_wait_scacquire;
|
||||
hsa_api_.hsa_signal_store_screlease = hsa_signal_store_screlease;
|
||||
|
||||
hsa_api_.hsa_code_object_reader_create_from_file = hsa_code_object_reader_create_from_file;
|
||||
hsa_api_.hsa_executable_create_alt = hsa_executable_create_alt;
|
||||
hsa_api_.hsa_executable_load_agent_code_object = hsa_executable_load_agent_code_object;
|
||||
hsa_api_.hsa_executable_freeze = hsa_executable_freeze;
|
||||
hsa_api_.hsa_executable_get_symbol = hsa_executable_get_symbol;
|
||||
hsa_api_.hsa_executable_symbol_get_info = hsa_executable_symbol_get_info;
|
||||
hsa_api_.hsa_executable_iterate_symbols = hsa_executable_iterate_symbols;
|
||||
|
||||
hsa_api_.hsa_system_get_info = hsa_system_get_info;
|
||||
hsa_api_.hsa_system_get_major_extension_table = hsa_system_get_major_extension_table;
|
||||
|
||||
hsa_api_.hsa_amd_agent_iterate_memory_pools = hsa_amd_agent_iterate_memory_pools;
|
||||
hsa_api_.hsa_amd_memory_pool_get_info = hsa_amd_memory_pool_get_info;
|
||||
hsa_api_.hsa_amd_memory_pool_allocate = hsa_amd_memory_pool_allocate;
|
||||
hsa_api_.hsa_amd_agents_allow_access = hsa_amd_agents_allow_access;
|
||||
hsa_api_.hsa_amd_memory_async_copy = hsa_amd_memory_async_copy;
|
||||
hsa_api_.hsa_amd_memory_async_copy_rect = hsa_amd_memory_async_copy_rect;
|
||||
|
||||
hsa_api_.hsa_amd_signal_async_handler = hsa_amd_signal_async_handler;
|
||||
hsa_api_.hsa_amd_profiling_set_profiler_enabled = hsa_amd_profiling_set_profiler_enabled;
|
||||
hsa_api_.hsa_amd_profiling_get_async_copy_time = hsa_amd_profiling_get_async_copy_time;
|
||||
hsa_api_.hsa_amd_profiling_get_dispatch_time = hsa_amd_profiling_get_dispatch_time;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
hsa_status_t HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) {
|
||||
void* handle = dlopen(kAqlProfileLib, RTLD_NOW);
|
||||
if (handle == NULL) {
|
||||
fprintf(stderr, "Loading '%s' failed, %s\n", kAqlProfileLib, dlerror());
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
dlerror(); /* Clear any existing error */
|
||||
|
||||
api->hsa_ven_amd_aqlprofile_error_string =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_error_string)*)dlsym(
|
||||
handle, "hsa_ven_amd_aqlprofile_error_string");
|
||||
api->hsa_ven_amd_aqlprofile_validate_event =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_validate_event)*)dlsym(
|
||||
handle, "hsa_ven_amd_aqlprofile_validate_event");
|
||||
api->hsa_ven_amd_aqlprofile_start =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_start)*)dlsym(handle, "hsa_ven_amd_aqlprofile_start");
|
||||
api->hsa_ven_amd_aqlprofile_stop =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_stop)*)dlsym(handle, "hsa_ven_amd_aqlprofile_stop");
|
||||
#ifdef AQLPROF_NEW_API
|
||||
api->hsa_ven_amd_aqlprofile_read =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_read)*)dlsym(handle, "hsa_ven_amd_aqlprofile_read");
|
||||
#endif
|
||||
api->hsa_ven_amd_aqlprofile_legacy_get_pm4 =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*)dlsym(
|
||||
handle, "hsa_ven_amd_aqlprofile_legacy_get_pm4");
|
||||
api->hsa_ven_amd_aqlprofile_get_info = (decltype(::hsa_ven_amd_aqlprofile_get_info)*)dlsym(
|
||||
handle, "hsa_ven_amd_aqlprofile_get_info");
|
||||
api->hsa_ven_amd_aqlprofile_iterate_data =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_iterate_data)*)dlsym(
|
||||
handle, "hsa_ven_amd_aqlprofile_iterate_data");
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Add system agent info
|
||||
const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
|
||||
// Determine if device is a Gpu agent
|
||||
hsa_status_t status;
|
||||
AgentInfo* agent_info = NULL;
|
||||
|
||||
hsa_device_type_t type;
|
||||
status = hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
|
||||
CHECK_STATUS("Error Calling hsa_agent_get_info", status);
|
||||
|
||||
if (type == HSA_DEVICE_TYPE_CPU) {
|
||||
agent_info = new AgentInfo{};
|
||||
agent_info->dev_id = agent;
|
||||
agent_info->dev_type = HSA_DEVICE_TYPE_CPU;
|
||||
agent_info->dev_index = cpu_list_.size();
|
||||
|
||||
status =
|
||||
hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->cpu_pool);
|
||||
if ((status == HSA_STATUS_INFO_BREAK) && (cpu_pool_ == NULL)) cpu_pool_ = &agent_info->cpu_pool;
|
||||
status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool,
|
||||
&agent_info->kern_arg_pool);
|
||||
if ((status == HSA_STATUS_INFO_BREAK) && (kern_arg_pool_ == NULL))
|
||||
kern_arg_pool_ = &agent_info->kern_arg_pool;
|
||||
agent_info->gpu_pool = {};
|
||||
|
||||
cpu_list_.push_back(agent_info);
|
||||
cpu_agents_.push_back(agent);
|
||||
}
|
||||
|
||||
if (type == HSA_DEVICE_TYPE_GPU) {
|
||||
agent_info = new AgentInfo{};
|
||||
agent_info->dev_id = agent;
|
||||
agent_info->dev_type = HSA_DEVICE_TYPE_GPU;
|
||||
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name);
|
||||
const int gfxip_label_len =
|
||||
std::min(strlen(agent_info->name) - 2, sizeof(agent_info->gfxip) - 1);
|
||||
memcpy(agent_info->gfxip, agent_info->name, gfxip_label_len);
|
||||
agent_info->gfxip[gfxip_label_len] = '\0';
|
||||
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size);
|
||||
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size);
|
||||
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile);
|
||||
agent_info->is_apu = (agent_info->profile == HSA_PROFILE_FULL) ? true : false;
|
||||
hsa_api_.hsa_agent_get_info(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT),
|
||||
&agent_info->cu_num);
|
||||
hsa_api_.hsa_agent_get_info(agent,
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU),
|
||||
&agent_info->waves_per_cu);
|
||||
hsa_api_.hsa_agent_get_info(agent,
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU),
|
||||
&agent_info->simds_per_cu);
|
||||
hsa_api_.hsa_agent_get_info(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES),
|
||||
&agent_info->se_num);
|
||||
hsa_api_.hsa_agent_get_info(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE),
|
||||
&agent_info->shader_arrays_per_se);
|
||||
|
||||
agent_info->cpu_pool = {};
|
||||
agent_info->kern_arg_pool = {};
|
||||
status =
|
||||
hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool);
|
||||
CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(gpu pool)", status);
|
||||
|
||||
// GFX8 and GFX9 SGPR/VGPR block sizes
|
||||
agent_info->sgpr_block_dflt = (strcmp(agent_info->gfxip, "gfx8") == 0) ? 1 : 2;
|
||||
agent_info->sgpr_block_size = 8;
|
||||
agent_info->vgpr_block_size = 4;
|
||||
|
||||
// Set GPU index
|
||||
agent_info->dev_index = gpu_list_.size();
|
||||
gpu_list_.push_back(agent_info);
|
||||
gpu_agents_.push_back(agent);
|
||||
}
|
||||
|
||||
if (agent_info) agent_map_[agent.handle] = agent_info;
|
||||
|
||||
return agent_info;
|
||||
}
|
||||
|
||||
// Return systen agent info
|
||||
const AgentInfo* HsaRsrcFactory::GetAgentInfo(const hsa_agent_t agent) {
|
||||
const AgentInfo* agent_info = NULL;
|
||||
auto it = agent_map_.find(agent.handle);
|
||||
if (it != agent_map_.end()) {
|
||||
agent_info = it->second;
|
||||
}
|
||||
return agent_info;
|
||||
}
|
||||
|
||||
// Get the count of Hsa Gpu Agents available on the platform
|
||||
//
|
||||
// @return uint32_t Number of Gpu agents on platform
|
||||
//
|
||||
uint32_t HsaRsrcFactory::GetCountOfGpuAgents() { return uint32_t(gpu_list_.size()); }
|
||||
|
||||
// Get the count of Hsa Cpu Agents available on the platform
|
||||
//
|
||||
// @return uint32_t Number of Cpu agents on platform
|
||||
//
|
||||
uint32_t HsaRsrcFactory::GetCountOfCpuAgents() { return uint32_t(cpu_list_.size()); }
|
||||
|
||||
// Get the AgentInfo handle of a Gpu device
|
||||
//
|
||||
// @param idx Gpu Agent at specified index
|
||||
//
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) {
|
||||
// Determine if request is valid
|
||||
uint32_t size = uint32_t(gpu_list_.size());
|
||||
if (idx >= size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Copy AgentInfo from specified index
|
||||
*agent_info = gpu_list_[idx];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Get the AgentInfo handle of a Cpu device
|
||||
//
|
||||
// @param idx Cpu Agent at specified index
|
||||
//
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) {
|
||||
// Determine if request is valid
|
||||
uint32_t size = uint32_t(cpu_list_.size());
|
||||
if (idx >= size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Copy AgentInfo from specified index
|
||||
*agent_info = cpu_list_[idx];
|
||||
return true;
|
||||
}
|
||||
|
||||
// Create a Queue object and return its handle. The queue object is expected
|
||||
// to support user requested number of Aql dispatch packets.
|
||||
//
|
||||
// @param agent_info Gpu Agent on which to create a queue object
|
||||
//
|
||||
// @param num_Pkts Number of packets to be held by queue
|
||||
//
|
||||
// @param queue Output parameter updated with handle of queue object
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts,
|
||||
hsa_queue_t** queue) {
|
||||
hsa_status_t status;
|
||||
status = hsa_api_.hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL,
|
||||
UINT32_MAX, UINT32_MAX, queue);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
// Create a Signal object and return its handle.
|
||||
// @param value Initial value of signal object
|
||||
// @param signal Output parameter updated with handle of signal object
|
||||
// @return bool true if successful, false otherwise
|
||||
bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) {
|
||||
hsa_status_t status;
|
||||
status = hsa_api_.hsa_signal_create(value, 0, NULL, signal);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
// Allocate memory for use by a kernel of specified size in specified
|
||||
// agent's memory region.
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
// @param size Size of memory in terms of bytes
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t size) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
uint8_t* buffer = NULL;
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
status = hsa_api_.hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0,
|
||||
reinterpret_cast<void**>(&buffer));
|
||||
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// Allocate memory to pass kernel parameters.
|
||||
// Memory is alocated accessible for all CPU agents and for GPU given by AgentInfo parameter.
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
// @param size Size of memory in terms of bytes
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
uint8_t* HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size_t size) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
uint8_t* buffer = NULL;
|
||||
if (!cpu_agents_.empty()) {
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
status = hsa_api_.hsa_amd_memory_pool_allocate(*kern_arg_pool_, size, 0,
|
||||
reinterpret_cast<void**>(&buffer));
|
||||
// Both the CPU and GPU can access the kernel arguments
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
hsa_agent_t ag_list[1] = {agent_info->dev_id};
|
||||
status = hsa_api_.hsa_amd_agents_allow_access(1, ag_list, NULL, buffer);
|
||||
}
|
||||
}
|
||||
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// Allocate system memory accessible by both CPU and GPU
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
// @param size Size of memory in terms of bytes
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t size) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
uint8_t* buffer = NULL;
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
if (!cpu_agents_.empty()) {
|
||||
status = hsa_api_.hsa_amd_memory_pool_allocate(*cpu_pool_, size, 0,
|
||||
reinterpret_cast<void**>(&buffer));
|
||||
// Both the CPU and GPU can access the memory
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
hsa_agent_t ag_list[1] = {agent_info->dev_id};
|
||||
status = hsa_api_.hsa_amd_agents_allow_access(1, ag_list, NULL, buffer);
|
||||
}
|
||||
}
|
||||
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// Allocate memory for command buffer.
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
// @param size Size of memory in terms of bytes
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
uint8_t* HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t size) {
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
uint8_t* ptr = (agent_info->is_apu && CMD_MEMORY_MMAP)
|
||||
? reinterpret_cast<uint8_t*>(
|
||||
mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANONYMOUS, 0, 0))
|
||||
: AllocateSysMemory(agent_info, size);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// Wait signal
|
||||
hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal,
|
||||
const hsa_signal_value_t& signal_value) const {
|
||||
const hsa_signal_value_t exp_value = signal_value - 1;
|
||||
hsa_signal_value_t ret_value = signal_value;
|
||||
while (1) {
|
||||
ret_value = hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, signal_value,
|
||||
timeout_, HSA_WAIT_STATE_BLOCKED);
|
||||
if (ret_value == exp_value) break;
|
||||
if (ret_value != signal_value) {
|
||||
std::cerr << "Error: HsaRsrcFactory::SignalWait: signal_value(" << signal_value
|
||||
<< "), ret_value(" << ret_value << ")" << std::endl
|
||||
<< std::flush;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
return ret_value;
|
||||
}
|
||||
|
||||
// Wait signal with signal value restore
|
||||
void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal,
|
||||
const hsa_signal_value_t& signal_value) const {
|
||||
SignalWait(signal, signal_value);
|
||||
hsa_api_.hsa_signal_store_relaxed(const_cast<hsa_signal_t&>(signal), signal_value);
|
||||
}
|
||||
|
||||
// Copy data from GPU to host memory
|
||||
bool HsaRsrcFactory::Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
if (!cpu_agents_.empty()) {
|
||||
hsa_signal_t s = {};
|
||||
status = hsa_api_.hsa_signal_create(1, 0, NULL, &s);
|
||||
CHECK_STATUS("hsa_signal_create()", status);
|
||||
status = hsa_api_.hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s);
|
||||
CHECK_STATUS("hsa_amd_memory_async_copy()", status);
|
||||
SignalWait(s, 1);
|
||||
status = hsa_api_.hsa_signal_destroy(s);
|
||||
CHECK_STATUS("hsa_signal_destroy()", status);
|
||||
}
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
bool HsaRsrcFactory::Memcpy(const AgentInfo* agent_info, void* dst, const void* src, size_t size) {
|
||||
return Memcpy(agent_info->dev_id, dst, src, size);
|
||||
}
|
||||
|
||||
// Memory free method
|
||||
bool HsaRsrcFactory::FreeMemory(void* ptr) {
|
||||
const hsa_status_t status = hsa_memory_free(ptr);
|
||||
CHECK_STATUS("hsa_memory_free", status);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
// Loads an Assembled Brig file and Finalizes it into Device Isa
|
||||
// @param agent_info Gpu device for which to finalize
|
||||
// @param brig_path File path of the Assembled Brig file
|
||||
// @param kernel_name Name of the kernel to finalize
|
||||
// @param code_desc Handle of finalized Code Descriptor that could
|
||||
// be used to submit for execution
|
||||
// @return bool true if successful, false otherwise
|
||||
bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path,
|
||||
const char* kernel_name, hsa_executable_t* executable,
|
||||
hsa_executable_symbol_t* code_desc) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
|
||||
// Build the code object filename
|
||||
std::string filename(brig_path);
|
||||
std::clog << "Code object filename: " << filename << std::endl;
|
||||
|
||||
// Open the file containing code object
|
||||
hsa_file_t file_handle = open(filename.c_str(), O_RDONLY);
|
||||
if (file_handle == -1) {
|
||||
std::cerr << "Error: failed to load '" << filename << "'" << std::endl;
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create code object reader
|
||||
hsa_code_object_reader_t code_obj_rdr = {0};
|
||||
status = hsa_api_.hsa_code_object_reader_create_from_file(file_handle, &code_obj_rdr);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
std::cerr << "Failed to create code object reader '" << filename << "'" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create executable.
|
||||
status = hsa_api_.hsa_executable_create_alt(
|
||||
HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, executable);
|
||||
CHECK_STATUS("Error in creating executable object", status);
|
||||
|
||||
// Load code object.
|
||||
status = hsa_api_.hsa_executable_load_agent_code_object(*executable, agent_info->dev_id,
|
||||
code_obj_rdr, NULL, NULL);
|
||||
CHECK_STATUS("Error in loading executable object", status);
|
||||
|
||||
// Freeze executable.
|
||||
status = hsa_api_.hsa_executable_freeze(*executable, "");
|
||||
CHECK_STATUS("Error in freezing executable object", status);
|
||||
|
||||
// Get symbol handle.
|
||||
hsa_executable_symbol_t kernelSymbol;
|
||||
status = hsa_api_.hsa_executable_get_symbol(*executable, NULL, kernel_name, agent_info->dev_id, 0,
|
||||
&kernelSymbol);
|
||||
CHECK_STATUS("Error in looking up kernel symbol", status);
|
||||
|
||||
// Update output parameter
|
||||
*code_desc = kernelSymbol;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Print the various fields of Hsa Gpu Agents
|
||||
bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) {
|
||||
std::cout << std::flush;
|
||||
std::clog << header << " :" << std::endl;
|
||||
|
||||
const AgentInfo* agent_info;
|
||||
int size = uint32_t(gpu_list_.size());
|
||||
for (int idx = 0; idx < size; idx++) {
|
||||
agent_info = gpu_list_[idx];
|
||||
|
||||
std::clog << "> agent[" << idx << "] :" << std::endl;
|
||||
std::clog << ">> Name : " << agent_info->name << std::endl;
|
||||
std::clog << ">> APU : " << agent_info->is_apu << std::endl;
|
||||
std::clog << ">> HSAIL profile : " << agent_info->profile << std::endl;
|
||||
std::clog << ">> Max Wave Size : " << agent_info->max_wave_size << std::endl;
|
||||
std::clog << ">> Max Queue Size : " << agent_info->max_queue_size << std::endl;
|
||||
std::clog << ">> CU number : " << agent_info->cu_num << std::endl;
|
||||
std::clog << ">> Waves per CU : " << agent_info->waves_per_cu << std::endl;
|
||||
std::clog << ">> SIMDs per CU : " << agent_info->simds_per_cu << std::endl;
|
||||
std::clog << ">> SE number : " << agent_info->se_num << std::endl;
|
||||
std::clog << ">> Shader Arrays per SE : " << agent_info->shader_arrays_per_se << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) {
|
||||
const uint32_t slot_size_b = CMD_SLOT_SIZE_B;
|
||||
|
||||
// adevance command queue
|
||||
const uint64_t write_idx = hsa_api_.hsa_queue_load_write_index_relaxed(queue);
|
||||
hsa_api_.hsa_queue_store_write_index_relaxed(queue, write_idx + 1);
|
||||
while ((write_idx - hsa_api_.hsa_queue_load_read_index_relaxed(queue)) >= queue->size) {
|
||||
sched_yield();
|
||||
}
|
||||
|
||||
uint32_t slot_idx = (uint32_t)(write_idx % queue->size);
|
||||
uint32_t* queue_slot =
|
||||
reinterpret_cast<uint32_t*>((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b));
|
||||
const uint32_t* slot_data = reinterpret_cast<const uint32_t*>(packet);
|
||||
|
||||
// Copy buffered commands into the queue slot.
|
||||
// Overwrite the AQL invalid header (first dword) last.
|
||||
// This prevents the slot from being read until it's fully written.
|
||||
memcpy(&queue_slot[1], &slot_data[1], slot_size_b - sizeof(uint32_t));
|
||||
std::atomic<uint32_t>* header_atomic_ptr =
|
||||
reinterpret_cast<std::atomic<uint32_t>*>(&queue_slot[0]);
|
||||
header_atomic_ptr->store(slot_data[0], std::memory_order_release);
|
||||
|
||||
// ringdoor bell
|
||||
hsa_api_.hsa_signal_store_relaxed(queue->doorbell_signal, write_idx);
|
||||
|
||||
return write_idx;
|
||||
}
|
||||
|
||||
uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes) {
|
||||
const uint32_t slot_size_b = CMD_SLOT_SIZE_B;
|
||||
if ((size_bytes & (slot_size_b - 1)) != 0) {
|
||||
fprintf(stderr, "HsaRsrcFactory::Submit: Bad packet size %zx\n", size_bytes);
|
||||
abort();
|
||||
}
|
||||
|
||||
const char* begin = reinterpret_cast<const char*>(packet);
|
||||
const char* end = begin + size_bytes;
|
||||
uint64_t write_idx = 0;
|
||||
for (const char* ptr = begin; ptr < end; ptr += slot_size_b) {
|
||||
write_idx = Submit(queue, ptr);
|
||||
}
|
||||
|
||||
return write_idx;
|
||||
}
|
||||
|
||||
const char* HsaRsrcFactory::GetKernelName(uint64_t addr) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
const auto it = symbols_map_->find(addr);
|
||||
if (it == symbols_map_->end()) {
|
||||
fprintf(stderr, "HsaRsrcFactory::kernel addr (0x%lx) is not found\n", addr);
|
||||
abort();
|
||||
}
|
||||
return strdup(it->second);
|
||||
}
|
||||
|
||||
void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
executable_tracking_on_ = true;
|
||||
table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor;
|
||||
}
|
||||
|
||||
hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec,
|
||||
hsa_executable_symbol_t symbol, void* data) {
|
||||
hsa_symbol_kind_t value = (hsa_symbol_kind_t)0;
|
||||
hsa_status_t status =
|
||||
hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value);
|
||||
CHECK_STATUS("Error in getting symbol info", status);
|
||||
if (value == HSA_SYMBOL_KIND_KERNEL) {
|
||||
uint64_t addr = 0;
|
||||
uint32_t len = 0;
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr);
|
||||
CHECK_STATUS("Error in getting kernel object", status);
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH,
|
||||
&len);
|
||||
CHECK_STATUS("Error in getting name len", status);
|
||||
char* name = new char[len + 1];
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, name);
|
||||
CHECK_STATUS("Error in getting kernel name", status);
|
||||
name[len] = 0;
|
||||
auto ret = symbols_map_->insert({addr, name});
|
||||
if (ret.second == false) {
|
||||
delete[] ret.first->second;
|
||||
ret.first->second = name;
|
||||
}
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable,
|
||||
const char* options) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (symbols_map_ == NULL) symbols_map_ = new symbols_map_t;
|
||||
hsa_status_t status =
|
||||
hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, NULL);
|
||||
CHECK_STATUS("Error in iterating executable symbols", status);
|
||||
return hsa_api_.hsa_executable_freeze(executable, options);
|
||||
;
|
||||
}
|
||||
|
||||
void HsaRsrcFactory::DumpHandles(FILE* file) {
|
||||
auto beg = agent_map_.begin();
|
||||
auto end = agent_map_.end();
|
||||
for (auto it = beg; it != end; ++it) {
|
||||
const AgentInfo* agent_info = it->second;
|
||||
fprintf(file, "0x%lx agent %s\n", agent_info->dev_id.handle,
|
||||
(agent_info->dev_type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu");
|
||||
if (agent_info->cpu_pool.handle != 0)
|
||||
fprintf(file, "0x%lx pool cpu\n", agent_info->cpu_pool.handle);
|
||||
if (agent_info->kern_arg_pool.handle != 0)
|
||||
fprintf(file, "0x%lx pool cpu kernarg\n", agent_info->kern_arg_pool.handle);
|
||||
if (agent_info->gpu_pool.handle != 0)
|
||||
fprintf(file, "0x%lx pool gpu%u\n", agent_info->gpu_pool.handle, agent_info->dev_index);
|
||||
}
|
||||
fflush(file);
|
||||
}
|
||||
|
||||
std::atomic<HsaRsrcFactory*> HsaRsrcFactory::instance_{};
|
||||
HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_;
|
||||
HsaRsrcFactory::timestamp_t HsaRsrcFactory::timeout_ns_ = HsaTimer::TIMESTAMP_MAX;
|
||||
hsa_pfn_t HsaRsrcFactory::hsa_api_{};
|
||||
bool HsaRsrcFactory::executable_tracking_on_ = false;
|
||||
HsaRsrcFactory::symbols_map_t* HsaRsrcFactory::symbols_map_ = NULL;
|
||||
|
||||
} // namespace util
|
||||
@@ -1,520 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef SRC_UTIL_HSA_RSRC_FACTORY_H_
|
||||
#define SRC_UTIL_HSA_RSRC_FACTORY_H_
|
||||
|
||||
#include <hsa.h>
|
||||
#include <hsa_api_trace.h>
|
||||
#include <hsa_ext_amd.h>
|
||||
#include <hsa_ext_finalize.h>
|
||||
#include <hsa_ven_amd_aqlprofile.h>
|
||||
#include <hsa_ven_amd_loader.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#define HSA_ARGUMENT_ALIGN_BYTES 16
|
||||
#define HSA_QUEUE_ALIGN_BYTES 64
|
||||
#define HSA_PACKET_ALIGN_BYTES 64
|
||||
|
||||
#define CHECK_STATUS(msg, status) \
|
||||
do { \
|
||||
if ((status) != HSA_STATUS_SUCCESS) { \
|
||||
const char* emsg = 0; \
|
||||
hsa_status_string(status, &emsg); \
|
||||
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CHECK_ITER_STATUS(msg, status) \
|
||||
do { \
|
||||
if ((status) != HSA_STATUS_INFO_BREAK) { \
|
||||
const char* emsg = 0; \
|
||||
hsa_status_string(status, &emsg); \
|
||||
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
namespace util {
|
||||
static const size_t MEM_PAGE_BYTES = 0x1000;
|
||||
static const size_t MEM_PAGE_MASK = MEM_PAGE_BYTES - 1;
|
||||
typedef decltype(hsa_agent_t::handle) hsa_agent_handle_t;
|
||||
|
||||
struct hsa_pfn_t {
|
||||
decltype(hsa_init)* hsa_init;
|
||||
decltype(hsa_shut_down)* hsa_shut_down;
|
||||
decltype(hsa_agent_get_info)* hsa_agent_get_info;
|
||||
decltype(hsa_iterate_agents)* hsa_iterate_agents;
|
||||
|
||||
decltype(hsa_queue_create)* hsa_queue_create;
|
||||
decltype(hsa_queue_destroy)* hsa_queue_destroy;
|
||||
decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed;
|
||||
decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed;
|
||||
decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed;
|
||||
|
||||
decltype(hsa_signal_create)* hsa_signal_create;
|
||||
decltype(hsa_signal_destroy)* hsa_signal_destroy;
|
||||
decltype(hsa_signal_load_relaxed)* hsa_signal_load_relaxed;
|
||||
decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed;
|
||||
decltype(hsa_signal_wait_scacquire)* hsa_signal_wait_scacquire;
|
||||
decltype(hsa_signal_store_screlease)* hsa_signal_store_screlease;
|
||||
|
||||
decltype(hsa_code_object_reader_create_from_file)* hsa_code_object_reader_create_from_file;
|
||||
decltype(hsa_executable_create_alt)* hsa_executable_create_alt;
|
||||
decltype(hsa_executable_load_agent_code_object)* hsa_executable_load_agent_code_object;
|
||||
decltype(hsa_executable_freeze)* hsa_executable_freeze;
|
||||
decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol;
|
||||
decltype(hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info;
|
||||
decltype(hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols;
|
||||
|
||||
decltype(hsa_system_get_info)* hsa_system_get_info;
|
||||
decltype(hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table;
|
||||
|
||||
decltype(hsa_amd_agent_iterate_memory_pools)* hsa_amd_agent_iterate_memory_pools;
|
||||
decltype(hsa_amd_memory_pool_get_info)* hsa_amd_memory_pool_get_info;
|
||||
decltype(hsa_amd_memory_pool_allocate)* hsa_amd_memory_pool_allocate;
|
||||
decltype(hsa_amd_agents_allow_access)* hsa_amd_agents_allow_access;
|
||||
decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy;
|
||||
decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect;
|
||||
|
||||
decltype(hsa_amd_signal_async_handler)* hsa_amd_signal_async_handler;
|
||||
decltype(hsa_amd_profiling_set_profiler_enabled)* hsa_amd_profiling_set_profiler_enabled;
|
||||
decltype(hsa_amd_profiling_get_async_copy_time)* hsa_amd_profiling_get_async_copy_time;
|
||||
decltype(hsa_amd_profiling_get_dispatch_time)* hsa_amd_profiling_get_dispatch_time;
|
||||
};
|
||||
|
||||
// Encapsulates information about a Hsa Agent such as its
|
||||
// handle, name, max queue size, max wavefront size, etc.
|
||||
struct AgentInfo {
|
||||
// Handle of Agent
|
||||
hsa_agent_t dev_id;
|
||||
|
||||
// Agent type - Cpu = 0, Gpu = 1 or Dsp = 2
|
||||
uint32_t dev_type;
|
||||
|
||||
// APU flag
|
||||
bool is_apu;
|
||||
|
||||
// Agent system index
|
||||
uint32_t dev_index;
|
||||
|
||||
// GFXIP name
|
||||
char gfxip[64];
|
||||
|
||||
// Name of Agent whose length is less than 64
|
||||
char name[64];
|
||||
|
||||
// Max size of Wavefront size
|
||||
uint32_t max_wave_size;
|
||||
|
||||
// Max size of Queue buffer
|
||||
uint32_t max_queue_size;
|
||||
|
||||
// Hsail profile supported by agent
|
||||
hsa_profile_t profile;
|
||||
|
||||
// CPU/GPU/kern-arg memory pools
|
||||
hsa_amd_memory_pool_t cpu_pool;
|
||||
hsa_amd_memory_pool_t gpu_pool;
|
||||
hsa_amd_memory_pool_t kern_arg_pool;
|
||||
|
||||
// The number of compute unit available in the agent.
|
||||
uint32_t cu_num;
|
||||
|
||||
// Maximum number of waves possible in a Compute Unit.
|
||||
uint32_t waves_per_cu;
|
||||
|
||||
// Number of SIMD's per compute unit CU
|
||||
uint32_t simds_per_cu;
|
||||
|
||||
// Number of Shader Engines (SE) in Gpu
|
||||
uint32_t se_num;
|
||||
|
||||
// Number of Shader Arrays Per Shader Engines in Gpu
|
||||
uint32_t shader_arrays_per_se;
|
||||
|
||||
// SGPR/VGPR block sizes
|
||||
uint32_t sgpr_block_dflt;
|
||||
uint32_t sgpr_block_size;
|
||||
uint32_t vgpr_block_size;
|
||||
};
|
||||
|
||||
// HSA timer class
|
||||
// Provides current HSA timestampa and system-clock/ns conversion API
|
||||
class HsaTimer {
|
||||
public:
|
||||
typedef uint64_t timestamp_t;
|
||||
static const timestamp_t TIMESTAMP_MAX = UINT64_MAX;
|
||||
typedef long double freq_t;
|
||||
|
||||
enum time_id_t { TIME_ID_CLOCK_REALTIME = 0, TIME_ID_CLOCK_MONOTONIC = 1, TIME_ID_NUMBER };
|
||||
|
||||
HsaTimer(const hsa_pfn_t* hsa_api) : hsa_api_(hsa_api) {
|
||||
timestamp_t sysclock_hz = 0;
|
||||
hsa_status_t status =
|
||||
hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
|
||||
CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY)", status);
|
||||
sysclock_factor_ = (freq_t)1000000000 / (freq_t)sysclock_hz;
|
||||
}
|
||||
|
||||
// Methods for system-clock/ns conversion
|
||||
timestamp_t sysclock_to_ns(const timestamp_t& sysclock) const {
|
||||
return timestamp_t((freq_t)sysclock * sysclock_factor_);
|
||||
}
|
||||
timestamp_t ns_to_sysclock(const timestamp_t& time) const {
|
||||
return timestamp_t((freq_t)time / sysclock_factor_);
|
||||
}
|
||||
|
||||
// Method for timespec/ns conversion
|
||||
static timestamp_t timespec_to_ns(const timespec& time) {
|
||||
return ((timestamp_t)time.tv_sec * 1000000000) + time.tv_nsec;
|
||||
}
|
||||
|
||||
// Return timestamp in 'ns'
|
||||
timestamp_t timestamp_ns() const {
|
||||
timestamp_t sysclock;
|
||||
hsa_status_t status = hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock);
|
||||
CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP)", status);
|
||||
return sysclock_to_ns(sysclock);
|
||||
}
|
||||
|
||||
// Return time in 'ns'
|
||||
static timestamp_t clocktime_ns(clockid_t clock_id) {
|
||||
timespec time;
|
||||
clock_gettime(clock_id, &time);
|
||||
return timespec_to_ns(time);
|
||||
}
|
||||
|
||||
// Return pair of correlated values of profiling timestamp and time with
|
||||
// correlation error for a given time ID and number of iterations
|
||||
void correlated_pair_ns(time_id_t time_id, uint32_t iters, timestamp_t* timestamp_v,
|
||||
timestamp_t* time_v, timestamp_t* error_v) const {
|
||||
clockid_t clock_id = 0;
|
||||
switch (time_id) {
|
||||
case TIME_ID_CLOCK_REALTIME:
|
||||
clock_id = CLOCK_REALTIME;
|
||||
break;
|
||||
case TIME_ID_CLOCK_MONOTONIC:
|
||||
clock_id = CLOCK_MONOTONIC;
|
||||
break;
|
||||
default:
|
||||
CHECK_STATUS("internal error: invalid time_id", HSA_STATUS_ERROR);
|
||||
}
|
||||
|
||||
std::vector<timestamp_t> ts_vec(iters);
|
||||
std::vector<timespec> tm_vec(iters);
|
||||
const uint32_t steps = iters - 1;
|
||||
|
||||
for (uint32_t i = 0; i < iters; ++i) {
|
||||
hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &ts_vec[i]);
|
||||
clock_gettime(clock_id, &tm_vec[i]);
|
||||
}
|
||||
|
||||
const timestamp_t ts_base = sysclock_to_ns(ts_vec.front());
|
||||
const timestamp_t tm_base = timespec_to_ns(tm_vec.front());
|
||||
const timestamp_t error = (ts_vec.back() - ts_vec.front()) / (2 * steps);
|
||||
|
||||
timestamp_t ts_accum = 0;
|
||||
timestamp_t tm_accum = 0;
|
||||
for (uint32_t i = 0; i < iters; ++i) {
|
||||
ts_accum += (ts_vec[i] - ts_base);
|
||||
tm_accum += (timespec_to_ns(tm_vec[i]) - tm_base);
|
||||
}
|
||||
|
||||
*timestamp_v = (ts_accum / iters) + ts_base + error;
|
||||
*time_v = (tm_accum / iters) + tm_base;
|
||||
*error_v = error;
|
||||
}
|
||||
|
||||
private:
|
||||
// Timestamp frequency factor
|
||||
freq_t sysclock_factor_;
|
||||
// HSA API table
|
||||
const hsa_pfn_t* const hsa_api_;
|
||||
};
|
||||
|
||||
class HsaRsrcFactory {
|
||||
public:
|
||||
static const size_t CMD_SLOT_SIZE_B = 0x40;
|
||||
typedef std::recursive_mutex mutex_t;
|
||||
typedef HsaTimer::timestamp_t timestamp_t;
|
||||
|
||||
static HsaRsrcFactory* Create(bool initialize_hsa = true) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
HsaRsrcFactory* obj = instance_.load(std::memory_order_relaxed);
|
||||
if (obj == NULL) {
|
||||
obj = new HsaRsrcFactory(initialize_hsa);
|
||||
instance_.store(obj, std::memory_order_release);
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
static HsaRsrcFactory& Instance() {
|
||||
HsaRsrcFactory* obj = instance_.load(std::memory_order_acquire);
|
||||
if (obj == NULL) obj = Create(false);
|
||||
hsa_status_t status = (obj != NULL) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
|
||||
CHECK_STATUS("HsaRsrcFactory::Instance() failed", status);
|
||||
return *obj;
|
||||
}
|
||||
|
||||
static void Destroy() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (instance_) delete instance_.load();
|
||||
instance_ = NULL;
|
||||
}
|
||||
|
||||
// Return system agent info
|
||||
const AgentInfo* GetAgentInfo(const hsa_agent_t agent);
|
||||
|
||||
// Get the count of Hsa Gpu Agents available on the platform
|
||||
// @return uint32_t Number of Gpu agents on platform
|
||||
uint32_t GetCountOfGpuAgents();
|
||||
|
||||
// Get the count of Hsa Cpu Agents available on the platform
|
||||
// @return uint32_t Number of Cpu agents on platform
|
||||
uint32_t GetCountOfCpuAgents();
|
||||
|
||||
// Get the AgentInfo handle of a Gpu device
|
||||
// @param idx Gpu Agent at specified index
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
// @return bool true if successful, false otherwise
|
||||
bool GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info);
|
||||
|
||||
// Get the AgentInfo handle of a Cpu device
|
||||
// @param idx Cpu Agent at specified index
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
// @return bool true if successful, false otherwise
|
||||
bool GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info);
|
||||
|
||||
// Create a Queue object and return its handle. The queue object is expected
|
||||
// to support user requested number of Aql dispatch packets.
|
||||
// @param agent_info Gpu Agent on which to create a queue object
|
||||
// @param num_Pkts Number of packets to be held by queue
|
||||
// @param queue Output parameter updated with handle of queue object
|
||||
// @return bool true if successful, false otherwise
|
||||
bool CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue);
|
||||
|
||||
// Create a Signal object and return its handle.
|
||||
// @param value Initial value of signal object
|
||||
// @param signal Output parameter updated with handle of signal object
|
||||
// @return bool true if successful, false otherwise
|
||||
bool CreateSignal(uint32_t value, hsa_signal_t* signal);
|
||||
|
||||
// Allocate local GPU memory
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
// @param size Size of memory in terms of bytes
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
uint8_t* AllocateLocalMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Allocate memory tp pass kernel parameters
|
||||
// Memory is alocated accessible for all CPU agents and for GPU given by AgentInfo parameter.
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
// @param size Size of memory in terms of bytes
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
uint8_t* AllocateKernArgMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Allocate system memory accessible from both CPU and GPU
|
||||
// Memory is alocated accessible to all CPU agents and AgentInfo parameter is ignored.
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
// @param size Size of memory in terms of bytes
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Allocate memory for command buffer.
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
// @param size Size of memory in terms of bytes
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
uint8_t* AllocateCmdMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Wait signal
|
||||
hsa_signal_value_t SignalWait(const hsa_signal_t& signal,
|
||||
const hsa_signal_value_t& signal_value) const;
|
||||
|
||||
// Wait signal with signal value restore
|
||||
void SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const;
|
||||
|
||||
// Copy data from GPU to host memory
|
||||
bool Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size);
|
||||
bool Memcpy(const AgentInfo* agent_info, void* dst, const void* src, size_t size);
|
||||
|
||||
// Memory free method
|
||||
static bool FreeMemory(void* ptr);
|
||||
|
||||
// Loads an Assembled Brig file and Finalizes it into Device Isa
|
||||
// @param agent_info Gpu device for which to finalize
|
||||
// @param brig_path File path of the Assembled Brig file
|
||||
// @param kernel_name Name of the kernel to finalize
|
||||
// @param code_desc Handle of finalized Code Descriptor that could
|
||||
// be used to submit for execution
|
||||
// @return true if successful, false otherwise
|
||||
bool LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, const char* kernel_name,
|
||||
hsa_executable_t* hsa_exec, hsa_executable_symbol_t* code_desc);
|
||||
|
||||
// Print the various fields of Hsa Gpu Agents
|
||||
bool PrintGpuAgents(const std::string& header);
|
||||
|
||||
// Submit AQL packet to given queue
|
||||
static uint64_t Submit(hsa_queue_t* queue, const void* packet);
|
||||
static uint64_t Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes);
|
||||
|
||||
// Enable executables loading tracking
|
||||
static bool IsExecutableTracking() { return executable_tracking_on_; }
|
||||
static void EnableExecutableTracking(HsaApiTable* table);
|
||||
static const char* GetKernelName(uint64_t addr);
|
||||
|
||||
// Initialize HSA API table
|
||||
void static InitHsaApiTable(HsaApiTable* table);
|
||||
static const hsa_pfn_t* HsaApi() { return &hsa_api_; }
|
||||
|
||||
// Return AqlProfile API table
|
||||
typedef hsa_ven_amd_aqlprofile_pfn_t aqlprofile_pfn_t;
|
||||
const aqlprofile_pfn_t* AqlProfileApi() const { return &aqlprofile_api_; }
|
||||
|
||||
// Return Loader API table
|
||||
const hsa_ven_amd_loader_1_00_pfn_t* LoaderApi() const { return &loader_api_; }
|
||||
|
||||
// Methods for system-clock/ns conversion and timestamp in 'ns'
|
||||
timestamp_t SysclockToNs(const timestamp_t& sysclock) const {
|
||||
return timer_->sysclock_to_ns(sysclock);
|
||||
}
|
||||
timestamp_t NsToSysclock(const timestamp_t& time) const { return timer_->ns_to_sysclock(time); }
|
||||
timestamp_t TimestampNs() const { return timer_->timestamp_ns(); }
|
||||
|
||||
timestamp_t GetSysTimeout() const { return timeout_; }
|
||||
static timestamp_t GetTimeoutNs() { return timeout_ns_; }
|
||||
static void SetTimeoutNs(const timestamp_t& time) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
timeout_ns_ = time;
|
||||
if (instance_ != NULL) Instance().timeout_ = Instance().timer_->ns_to_sysclock(time);
|
||||
}
|
||||
|
||||
void CorrelateTime(HsaTimer::time_id_t time_id, uint32_t iters) {
|
||||
timestamp_t timestamp_v = 0;
|
||||
timestamp_t time_v = 0;
|
||||
timestamp_t error_v = 0;
|
||||
timer_->correlated_pair_ns(time_id, iters, ×tamp_v, &time_v, &error_v);
|
||||
time_shift_[time_id] = time_v - timestamp_v;
|
||||
time_error_[time_id] = error_v;
|
||||
}
|
||||
|
||||
hsa_status_t GetTime(uint32_t time_id, timestamp_t value, uint64_t* time) {
|
||||
if (time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR;
|
||||
*time = value + time_shift_[time_id];
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t GetTimestamp(uint32_t time_id, uint64_t value, timestamp_t* timestamp) {
|
||||
if (time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR;
|
||||
*timestamp = value - time_shift_[time_id];
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void DumpHandles(FILE* output_file);
|
||||
|
||||
private:
|
||||
// System agents iterating callback
|
||||
static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data);
|
||||
|
||||
// Callback function to find and bind kernarg region of an agent
|
||||
static hsa_status_t FindMemRegionsCallback(hsa_region_t region, void* data);
|
||||
|
||||
// Load AQL profile HSA extension library directly
|
||||
static hsa_status_t LoadAqlProfileLib(aqlprofile_pfn_t* api);
|
||||
|
||||
// Constructor of the class. Will initialize the Hsa Runtime and
|
||||
// query the system topology to get the list of Cpu and Gpu devices
|
||||
explicit HsaRsrcFactory(bool initialize_hsa);
|
||||
|
||||
// Destructor of the class
|
||||
~HsaRsrcFactory();
|
||||
|
||||
// Add an instance of AgentInfo representing a Hsa Gpu agent
|
||||
const AgentInfo* AddAgentInfo(const hsa_agent_t agent);
|
||||
|
||||
// To mmap command buffer memory
|
||||
static const bool CMD_MEMORY_MMAP = false;
|
||||
|
||||
// HSA was initialized
|
||||
const bool initialize_hsa_;
|
||||
|
||||
static std::atomic<HsaRsrcFactory*> instance_;
|
||||
static mutex_t mutex_;
|
||||
|
||||
// Used to maintain a list of Hsa Gpu Agent Info
|
||||
std::vector<const AgentInfo*> gpu_list_;
|
||||
std::vector<hsa_agent_t> gpu_agents_;
|
||||
|
||||
// Used to maintain a list of Hsa Cpu Agent Info
|
||||
std::vector<const AgentInfo*> cpu_list_;
|
||||
std::vector<hsa_agent_t> cpu_agents_;
|
||||
|
||||
// System agents map
|
||||
std::map<hsa_agent_handle_t, const AgentInfo*> agent_map_;
|
||||
|
||||
// Executables loading tracking
|
||||
typedef std::map<uint64_t, const char*> symbols_map_t;
|
||||
static symbols_map_t* symbols_map_;
|
||||
static bool executable_tracking_on_;
|
||||
static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable,
|
||||
const char* options);
|
||||
static hsa_status_t executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol,
|
||||
void* data);
|
||||
|
||||
// HSA runtime API table
|
||||
static hsa_pfn_t hsa_api_;
|
||||
|
||||
// AqlProfile API table
|
||||
aqlprofile_pfn_t aqlprofile_api_;
|
||||
|
||||
// Loader API table
|
||||
hsa_ven_amd_loader_1_00_pfn_t loader_api_;
|
||||
|
||||
// System timeout, ns
|
||||
static timestamp_t timeout_ns_;
|
||||
// System timeout, sysclock
|
||||
timestamp_t timeout_;
|
||||
|
||||
// HSA timer
|
||||
HsaTimer* timer_;
|
||||
|
||||
// Time shift array to support time conversion
|
||||
timestamp_t time_shift_[HsaTimer::TIME_ID_NUMBER];
|
||||
timestamp_t time_error_[HsaTimer::TIME_ID_NUMBER];
|
||||
|
||||
// CPU/kern-arg memory pools
|
||||
hsa_amd_memory_pool_t* cpu_pool_;
|
||||
hsa_amd_memory_pool_t* kern_arg_pool_;
|
||||
};
|
||||
|
||||
} // namespace util
|
||||
|
||||
#endif // SRC_UTIL_HSA_RSRC_FACTORY_H_
|
||||
Ссылка в новой задаче
Block a user