@@ -34,6 +34,7 @@ THE SOFTWARE.
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include "core/loader.h"
|
||||
#include "proxy/tracker.h"
|
||||
#include "ext/hsa_rt_utils.hpp"
|
||||
#include "util/exception.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
@@ -84,6 +85,21 @@ THE SOFTWARE.
|
||||
// Internal library methods
|
||||
//
|
||||
namespace roctracer {
|
||||
decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn;
|
||||
decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn;
|
||||
|
||||
namespace hsa_support {
|
||||
// callbacks table
|
||||
cb_table_t cb_table;
|
||||
// activity enabled
|
||||
bool enabled = false;;
|
||||
// Table of function pointers to HSA Core Runtime
|
||||
CoreApiTable CoreApiTable_saved{};
|
||||
// Table of function pointers to AMD extensions
|
||||
AmdExtTable AmdExtTable_saved{};
|
||||
// Table of function pointers to HSA Image Extension
|
||||
ImageExtTable ImageExtTable_saved{};
|
||||
}
|
||||
|
||||
roctracer_status_t GetExcStatus(const std::exception& e) {
|
||||
const util::exception* roctracer_exc_ptr = dynamic_cast<const util::exception*>(&e);
|
||||
@@ -276,7 +292,7 @@ CONSTRUCTOR_API void constructor() {
|
||||
}
|
||||
|
||||
DESTRUCTOR_API void destructor() {
|
||||
util::HsaRsrcFactory::Destroy();
|
||||
::util::HsaRsrcFactory::Destroy();
|
||||
util::Logger::Destroy();
|
||||
}
|
||||
|
||||
@@ -350,6 +366,65 @@ void HCC_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) {
|
||||
pool->Write(*record_ptr);
|
||||
}
|
||||
|
||||
bool hsa_async_copy_handler(hsa_signal_value_t value, void* arg) {
|
||||
::proxy::Tracker::entry_t* entry = reinterpret_cast<::proxy::Tracker::entry_t*>(arg);
|
||||
printf("%lu:%lu async-copy%lu\n", entry->record->begin, entry->record->end, entry->index);
|
||||
return false;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_memory_async_copy_interceptor(
|
||||
void* dst, hsa_agent_t dst_agent, const void* src,
|
||||
hsa_agent_t src_agent, size_t size, uint32_t num_dep_signals,
|
||||
const hsa_signal_t* dep_signals, hsa_signal_t completion_signal)
|
||||
{
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
if (hsa_support::enabled) {
|
||||
::proxy::Tracker* tracker = &::proxy::Tracker::Instance();
|
||||
::proxy::Tracker::entry_t* tracker_entry = tracker->Alloc(hsa_agent_t{}, completion_signal);
|
||||
status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src,
|
||||
src_agent, size, num_dep_signals,
|
||||
dep_signals, tracker_entry->signal);
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
tracker->EnableMemcopy(tracker_entry, hsa_async_copy_handler, reinterpret_cast<void*>(tracker_entry));
|
||||
} else {
|
||||
tracker->Delete(tracker_entry);
|
||||
}
|
||||
} else {
|
||||
status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src,
|
||||
src_agent, size, num_dep_signals,
|
||||
dep_signals, completion_signal);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_memory_async_copy_rect_interceptor(
|
||||
const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src,
|
||||
const hsa_dim3_t* src_offset, const hsa_dim3_t* range, hsa_agent_t copy_agent,
|
||||
hsa_amd_copy_direction_t dir, uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
|
||||
hsa_signal_t completion_signal)
|
||||
{
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
if (hsa_support::enabled) {
|
||||
::proxy::Tracker* tracker = &::proxy::Tracker::Instance();
|
||||
::proxy::Tracker::entry_t* tracker_entry = tracker->Alloc(hsa_agent_t{}, completion_signal);
|
||||
status = hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src,
|
||||
src_offset, range, copy_agent,
|
||||
dir, num_dep_signals, dep_signals,
|
||||
tracker_entry->signal);
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
tracker->EnableMemcopy(tracker_entry, hsa_async_copy_handler, reinterpret_cast<void*>(tracker_entry));
|
||||
} else {
|
||||
tracker->Delete(tracker_entry);
|
||||
}
|
||||
} else {
|
||||
status = hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src,
|
||||
src_offset, range, copy_agent,
|
||||
dir, num_dep_signals, dep_signals,
|
||||
completion_signal);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
util::Logger::mutex_t util::Logger::mutex_;
|
||||
util::Logger* util::Logger::instance_ = NULL;
|
||||
MemoryPool* memory_pool = NULL;
|
||||
@@ -359,18 +434,11 @@ memory_pool_mutex_t memory_pool_mutex;
|
||||
Loader::mutex_t Loader::mutex_;
|
||||
HipLoader* HipLoader::instance_;
|
||||
HccLoader* HccLoader::instance_;
|
||||
}
|
||||
|
||||
namespace hsa_support {
|
||||
// callbacks table
|
||||
cb_table_t cb_table;
|
||||
// Table of function pointers to HSA Core Runtime
|
||||
CoreApiTable CoreApiTable_saved{};
|
||||
// Table of function pointers to AMD extensions
|
||||
AmdExtTable AmdExtTable_saved{};
|
||||
// Table of function pointers to HSA Image Extension
|
||||
ImageExtTable ImageExtTable_saved{};
|
||||
}
|
||||
}
|
||||
proxy::Tracker* proxy::Tracker::instance_ = NULL;
|
||||
proxy::Tracker::mutex_t proxy::Tracker::glob_mutex_;
|
||||
proxy::Tracker::counter_t proxy::Tracker::counter_ = 0;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Public library methods
|
||||
@@ -507,9 +575,7 @@ static void roctracer_disable_callback_impl(
|
||||
uint32_t op)
|
||||
{
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_API: break;
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: break;
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
hipError_t hip_err = roctracer::HipLoader::Instance().RemoveApiCallback(op);
|
||||
@@ -593,7 +659,10 @@ static void roctracer_enable_activity_impl(
|
||||
{
|
||||
if (pool == NULL) pool = roctracer_default_pool();
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_API: break;
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
roctracer::hsa_support::enabled = true;
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: {
|
||||
if (roctracer::HccLoader::GetRef() == NULL) {
|
||||
roctracer::HccLoader::Instance().InitActivityCallback((void*)roctracer::HCC_ActivityIdCallback,
|
||||
@@ -651,7 +720,10 @@ static void roctracer_disable_activity_impl(
|
||||
uint32_t op)
|
||||
{
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_API: break;
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
roctracer::hsa_support::enabled = false;
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: {
|
||||
const bool succ = roctracer::HccLoader::Instance().EnableActivityCallback(op, false);
|
||||
if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback(NULL) error domain(" << domain << ") op(" << op << ")");
|
||||
@@ -731,6 +803,14 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
|
||||
PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names) {
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_API, (void*)table);
|
||||
|
||||
hsa_status_t status = hsa_amd_profiling_async_copy_enable(true);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "hsa_amd_profiling_async_copy_enable");
|
||||
roctracer::hsa_amd_memory_async_copy_fn = table->amd_ext_->hsa_amd_memory_async_copy_fn;
|
||||
roctracer::hsa_amd_memory_async_copy_rect_fn = table->amd_ext_->hsa_amd_memory_async_copy_rect_fn;
|
||||
table->amd_ext_->hsa_amd_memory_async_copy_fn = roctracer::hsa_amd_memory_async_copy_interceptor;
|
||||
table->amd_ext_->hsa_amd_memory_async_copy_rect_fn = roctracer::hsa_amd_memory_async_copy_rect_interceptor;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,24 +1,26 @@
|
||||
/*
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
/**********************************************************************
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
provided that the following conditions are met:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
<95> Redistributions of source code must retain the above copyright notice, this list of
|
||||
conditions and the following disclaimer.
|
||||
<95> Redistributions in binary form must reproduce the above copyright notice, this list of
|
||||
conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
********************************************************************/
|
||||
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
@@ -31,6 +33,7 @@ THE SOFTWARE.
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
@@ -41,11 +44,9 @@ THE SOFTWARE.
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#ifndef AQL_PROFILE_READ_API_ENABLE
|
||||
#define AQL_PROFILE_READ_API_ENABLE 0
|
||||
#endif
|
||||
#include "util/exception.h"
|
||||
#include "util/logger.h"
|
||||
|
||||
namespace roctracer {
|
||||
namespace util {
|
||||
|
||||
// Callback function to get available in the system agents
|
||||
@@ -66,8 +67,7 @@ hsa_status_t HsaRsrcFactory::GetHsaAgentsCallback(hsa_agent_t agent, void* data)
|
||||
// returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but
|
||||
// no pool was found meeting the requirements. If an error is encountered, we
|
||||
// return that error.
|
||||
static hsa_status_t
|
||||
FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) {
|
||||
static hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) {
|
||||
hsa_status_t err;
|
||||
hsa_amd_segment_t segment;
|
||||
uint32_t flag;
|
||||
@@ -76,21 +76,18 @@ FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
|
||||
&segment);
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
|
||||
CHECK_STATUS("hsa_amd_memory_pool_get_info", err);
|
||||
if (HSA_AMD_SEGMENT_GLOBAL != segment) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
|
||||
CHECK_STATUS("hsa_amd_memory_pool_get_info", err);
|
||||
|
||||
uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
|
||||
|
||||
if ((karg_st == 0 && kern_arg) ||
|
||||
(karg_st != 0 && !kern_arg)) {
|
||||
if ((karg_st == 0 && kern_arg) || (karg_st != 0 && !kern_arg)) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -111,59 +108,52 @@ hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data) {
|
||||
hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) {
|
||||
return FindGlobalPool(pool, data, true);
|
||||
}
|
||||
#if 0
|
||||
// Callback function to find and bind kernarg region of an agent
|
||||
hsa_status_t HsaRsrcFactory::FindMemRegionsCallback(hsa_region_t region, void* data) {
|
||||
hsa_region_global_flag_t flags;
|
||||
hsa_region_segment_t segment_id;
|
||||
|
||||
hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id);
|
||||
if (segment_id != HSA_REGION_SEGMENT_GLOBAL) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
AgentInfo* agent_info = (AgentInfo*)data;
|
||||
hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
|
||||
if (flags & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) {
|
||||
agent_info->coarse_region = region;
|
||||
}
|
||||
|
||||
if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) {
|
||||
agent_info->kernarg_region = region;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
// Constructor of the class
|
||||
HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize_hsa) {
|
||||
hsa_status_t status;
|
||||
|
||||
cpu_pool_ = NULL;
|
||||
kern_arg_pool_ = NULL;
|
||||
|
||||
// Initialize the Hsa Runtime
|
||||
if (initialize_hsa_) {
|
||||
status = hsa_init();
|
||||
CHECK_STATUS("Error in hsa_init", status);
|
||||
}
|
||||
|
||||
// Discover the set of Gpu devices available on the platform
|
||||
status = hsa_iterate_agents(GetHsaAgentsCallback, this);
|
||||
CHECK_STATUS("Error Calling hsa_iterate_agents", status);
|
||||
if (cpu_pool_ == NULL) CHECK_STATUS("CPU memory pool is not found", HSA_STATUS_ERROR);
|
||||
if (kern_arg_pool_ == NULL) CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR);
|
||||
|
||||
// Get AqlProfile API table
|
||||
aqlprofile_api_ = {0};
|
||||
#ifdef ROCP_LD_AQLPROFILE
|
||||
status = LoadAqlProfileLib(&aqlprofile_api_);
|
||||
#else
|
||||
status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &aqlprofile_api_);
|
||||
status = hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, hsa_ven_amd_aqlprofile_VERSION_MAJOR, sizeof(aqlprofile_api_), &aqlprofile_api_);
|
||||
#endif
|
||||
CHECK_STATUS("aqlprofile API table load failed", status);
|
||||
|
||||
// Get Loader API table
|
||||
loader_api_ = {0};
|
||||
status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_LOADER, 1, 0, &loader_api_);
|
||||
status = hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, sizeof(loader_api_), &loader_api_);
|
||||
CHECK_STATUS("loader API table query failed", status);
|
||||
|
||||
// Instantiate HSA timer
|
||||
timer_ = new HsaTimer;
|
||||
CHECK_STATUS("HSA timer allocation failed",
|
||||
(timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS);
|
||||
|
||||
// System timeout
|
||||
timeout_ = (timeout_ns_ == HsaTimer::TIMESTAMP_MAX) ? timeout_ns_ : timer_->ns_to_sysclock(timeout_ns_);
|
||||
}
|
||||
|
||||
// Destructor of the class
|
||||
HsaRsrcFactory::~HsaRsrcFactory() {
|
||||
delete timer_;
|
||||
for (auto p : cpu_list_) delete p;
|
||||
for (auto p : gpu_list_) delete p;
|
||||
if (initialize_hsa_) {
|
||||
@@ -173,39 +163,35 @@ HsaRsrcFactory::~HsaRsrcFactory() {
|
||||
}
|
||||
|
||||
hsa_status_t HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) {
|
||||
void* handle = dlopen(kAqlProfileLib, RTLD_NOW);
|
||||
if (handle == NULL) {
|
||||
fprintf(stderr, "Loading '%s' failed, %s\n", kAqlProfileLib, dlerror());
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
dlerror(); /* Clear any existing error */
|
||||
void* handle = dlopen(kAqlProfileLib, RTLD_NOW);
|
||||
if (handle == NULL) {
|
||||
fprintf(stderr, "Loading '%s' failed, %s\n", kAqlProfileLib, dlerror());
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
dlerror(); /* Clear any existing error */
|
||||
|
||||
api->hsa_ven_amd_aqlprofile_error_string =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_error_string)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_error_string");
|
||||
api->hsa_ven_amd_aqlprofile_validate_event =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_validate_event)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_validate_event");
|
||||
api->hsa_ven_amd_aqlprofile_start =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_start)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_start");
|
||||
api->hsa_ven_amd_aqlprofile_stop =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_stop)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_stop");
|
||||
#if AQL_PROFILE_READ_API_ENABLE
|
||||
api->hsa_ven_amd_aqlprofile_read =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_read)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_read");
|
||||
#endif // AQL_PROFILE_READ_API_ENABLE
|
||||
api->hsa_ven_amd_aqlprofile_legacy_get_pm4 =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_legacy_get_pm4");
|
||||
api->hsa_ven_amd_aqlprofile_get_info =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_get_info)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_get_info");
|
||||
api->hsa_ven_amd_aqlprofile_iterate_data =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_iterate_data)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_iterate_data");
|
||||
api->hsa_ven_amd_aqlprofile_error_string =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_error_string)*)dlsym(
|
||||
handle, "hsa_ven_amd_aqlprofile_error_string");
|
||||
api->hsa_ven_amd_aqlprofile_validate_event =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_validate_event)*)dlsym(
|
||||
handle, "hsa_ven_amd_aqlprofile_validate_event");
|
||||
api->hsa_ven_amd_aqlprofile_start =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_start)*)dlsym(handle, "hsa_ven_amd_aqlprofile_start");
|
||||
api->hsa_ven_amd_aqlprofile_stop =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_stop)*)dlsym(handle, "hsa_ven_amd_aqlprofile_stop");
|
||||
#ifdef AQLPROF_NEW_API
|
||||
api->hsa_ven_amd_aqlprofile_read =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_read)*)dlsym(handle, "hsa_ven_amd_aqlprofile_read");
|
||||
#endif
|
||||
api->hsa_ven_amd_aqlprofile_legacy_get_pm4 =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*)dlsym(
|
||||
handle, "hsa_ven_amd_aqlprofile_legacy_get_pm4");
|
||||
api->hsa_ven_amd_aqlprofile_get_info = (decltype(::hsa_ven_amd_aqlprofile_get_info)*)dlsym(
|
||||
handle, "hsa_ven_amd_aqlprofile_get_info");
|
||||
api->hsa_ven_amd_aqlprofile_iterate_data =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_iterate_data)*)dlsym(
|
||||
handle, "hsa_ven_amd_aqlprofile_iterate_data");
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -227,9 +213,9 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
|
||||
agent_info->dev_index = cpu_list_.size();
|
||||
|
||||
status = hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->cpu_pool);
|
||||
CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(cpu pool)", status);
|
||||
if ((status == HSA_STATUS_INFO_BREAK) && (cpu_pool_ == NULL)) cpu_pool_ = &agent_info->cpu_pool;
|
||||
status = hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool, &agent_info->kern_arg_pool);
|
||||
CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(kern arg pool)", status);
|
||||
if ((status == HSA_STATUS_INFO_BREAK) && (kern_arg_pool_ == NULL)) kern_arg_pool_ = &agent_info->kern_arg_pool;
|
||||
agent_info->gpu_pool = {};
|
||||
|
||||
cpu_list_.push_back(agent_info);
|
||||
@@ -247,23 +233,22 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
|
||||
hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size);
|
||||
hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile);
|
||||
agent_info->is_apu = (agent_info->profile == HSA_PROFILE_FULL) ? true : false;
|
||||
hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), &agent_info->cu_num);
|
||||
hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU), &agent_info->waves_per_cu);
|
||||
hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), &agent_info->simds_per_cu);
|
||||
hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), &agent_info->se_num);
|
||||
hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE), &agent_info->shader_arrays_per_se);
|
||||
hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT),
|
||||
&agent_info->cu_num);
|
||||
hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU),
|
||||
&agent_info->waves_per_cu);
|
||||
hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU),
|
||||
&agent_info->simds_per_cu);
|
||||
hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES),
|
||||
&agent_info->se_num);
|
||||
hsa_agent_get_info(agent,
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE),
|
||||
&agent_info->shader_arrays_per_se);
|
||||
|
||||
agent_info->cpu_pool = {};
|
||||
agent_info->kern_arg_pool = {};
|
||||
status = hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool);
|
||||
CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(gpu pool)", status);
|
||||
#if 0
|
||||
// Initialize memory regions to zero
|
||||
agent_info->kernarg_region.handle = 0;
|
||||
agent_info->coarse_region.handle = 0;
|
||||
// Find and Bind Memory regions of the Gpu agent
|
||||
hsa_agent_iterate_regions(agent, FindMemRegionsCallback, agent_info);
|
||||
#endif
|
||||
|
||||
// Set GPU index
|
||||
agent_info->dev_index = gpu_list_.size();
|
||||
@@ -377,14 +362,8 @@ uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
uint8_t* buffer = NULL;
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
status = hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, (void**)&buffer);
|
||||
// Only GPU can access the memory
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
hsa_agent_t agents_list[1] = {agent_info->dev_id};
|
||||
status = hsa_amd_agents_allow_access(1, agents_list, NULL, buffer);
|
||||
}
|
||||
status = hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, reinterpret_cast<void**>(&buffer));
|
||||
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
printf("AllocateLocalMemory %p\n", ptr);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
@@ -398,16 +377,14 @@ uint8_t* HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size
|
||||
uint8_t* buffer = NULL;
|
||||
if (!cpu_agents_.empty()) {
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
status = hsa_amd_memory_pool_allocate(cpu_list_[0]->kern_arg_pool, size, 0, (void**)&buffer);
|
||||
status = hsa_amd_memory_pool_allocate(*kern_arg_pool_, size, 0, reinterpret_cast<void**>(&buffer));
|
||||
// Both the CPU and GPU can access the kernel arguments
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
auto agents_vec = cpu_agents_;
|
||||
agents_vec.push_back(agent_info->dev_id);
|
||||
status = hsa_amd_agents_allow_access(agents_vec.size(), &agents_vec[0], NULL, buffer);
|
||||
hsa_agent_t ag_list[1] = {agent_info->dev_id};
|
||||
status = hsa_amd_agents_allow_access(1, ag_list, NULL, buffer);
|
||||
}
|
||||
}
|
||||
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
printf("AllocateKernargMemory %p\n", ptr);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
@@ -418,41 +395,76 @@ uint8_t* HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size
|
||||
uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t size) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
uint8_t* buffer = NULL;
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
if (!cpu_agents_.empty()) {
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
status = hsa_amd_memory_pool_allocate(cpu_list_[0]->cpu_pool, size, 0, (void**)&buffer);
|
||||
status = hsa_amd_memory_pool_allocate(*cpu_pool_, size, 0, reinterpret_cast<void**>(&buffer));
|
||||
// Both the CPU and GPU can access the memory
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
auto agents_vec = cpu_agents_;
|
||||
agents_vec.push_back(agent_info->dev_id);
|
||||
status = hsa_amd_agents_allow_access(agents_vec.size(), &agents_vec[0], NULL, buffer);
|
||||
hsa_agent_t ag_list[1] = {agent_info->dev_id};
|
||||
status = hsa_amd_agents_allow_access(1, ag_list, NULL, buffer);
|
||||
}
|
||||
}
|
||||
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
printf("AllocateSysMemory %p\n", ptr);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// Allocate memory for command buffer.
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
// @param size Size of memory in terms of bytes
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
uint8_t* HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t size) {
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
uint8_t* ptr = (agent_info->is_apu && CMD_MEMORY_MMAP)
|
||||
? reinterpret_cast<uint8_t*>(
|
||||
mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANONYMOUS, 0, 0))
|
||||
: AllocateSysMemory(agent_info, size);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// Wait signal
|
||||
void HsaRsrcFactory::SignalWait(const hsa_signal_t& signal) const {
|
||||
while (1) {
|
||||
const hsa_signal_value_t signal_value =
|
||||
hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, timeout_, HSA_WAIT_STATE_BLOCKED);
|
||||
if (signal_value == 0) {
|
||||
break;
|
||||
} else {
|
||||
if (signal_value == 1) { WARN_LOGGING("signal waiting..."); }
|
||||
else { EXC_RAISING(HSA_STATUS_ERROR, "hsa_signal_wait_scacquire (" << signal_value << ")"); }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Wait signal with signal value restore
|
||||
void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const {
|
||||
SignalWait(signal);
|
||||
hsa_signal_store_relaxed(const_cast<hsa_signal_t&>(signal), signal_value);
|
||||
}
|
||||
|
||||
// Copy data from GPU to host memory
|
||||
bool HsaRsrcFactory::CopyToHost(const hsa_agent_t& agent, void* dst, const void* src, size_t size) {
|
||||
bool HsaRsrcFactory::Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
if (!cpu_agents_.empty()) {
|
||||
hsa_signal_t s = {};
|
||||
hsa_status_t status = hsa_signal_create(1, 0, NULL, &s);
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
status = hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s);
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
if (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED) != 0) {
|
||||
status = HSA_STATUS_ERROR;
|
||||
}
|
||||
}
|
||||
status = hsa_signal_destroy(s);
|
||||
}
|
||||
status = hsa_signal_create(1, 0, NULL, &s);
|
||||
CHECK_STATUS("hsa_signal_create()", status);
|
||||
status = hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s);
|
||||
CHECK_STATUS("hsa_amd_memory_async_copy()", status);
|
||||
SignalWait(s);
|
||||
status = hsa_signal_destroy(s);
|
||||
CHECK_STATUS("hsa_signal_destroy()", status);
|
||||
}
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
bool HsaRsrcFactory::CopyToHost(const AgentInfo* agent_info, void* dst, const void* src, size_t size) {
|
||||
return CopyToHost(agent_info->dev_id, dst, src, size);
|
||||
bool HsaRsrcFactory::Memcpy(const AgentInfo* agent_info, void* dst, const void* src, size_t size) {
|
||||
return Memcpy(agent_info->dev_id, dst, src, size);
|
||||
}
|
||||
|
||||
// Memory free method
|
||||
bool HsaRsrcFactory::FreeMemory(void* ptr) {
|
||||
const hsa_status_t status = hsa_memory_free(ptr);
|
||||
CHECK_STATUS("hsa_memory_free", status);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
// Loads an Assembled Brig file and Finalizes it into Device Isa
|
||||
@@ -463,7 +475,8 @@ bool HsaRsrcFactory::CopyToHost(const AgentInfo* agent_info, void* dst, const vo
|
||||
// be used to submit for execution
|
||||
// @return bool true if successful, false otherwise
|
||||
bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path,
|
||||
const char* kernel_name, hsa_executable_t* executable, hsa_executable_symbol_t* code_desc) {
|
||||
const char* kernel_name, hsa_executable_t* executable,
|
||||
hsa_executable_symbol_t* code_desc) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
|
||||
// Build the code object filename
|
||||
@@ -487,13 +500,13 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br
|
||||
}
|
||||
|
||||
// Create executable.
|
||||
status = hsa_executable_create_alt(HSA_PROFILE_FULL,
|
||||
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, executable);
|
||||
status = hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
|
||||
NULL, executable);
|
||||
CHECK_STATUS("Error in creating executable object", status);
|
||||
|
||||
// Load code object.
|
||||
status = hsa_executable_load_agent_code_object(*executable, agent_info->dev_id,
|
||||
code_obj_rdr, NULL, NULL);
|
||||
status = hsa_executable_load_agent_code_object(*executable, agent_info->dev_id, code_obj_rdr,
|
||||
NULL, NULL);
|
||||
CHECK_STATUS("Error in loading executable object", status);
|
||||
|
||||
// Freeze executable.
|
||||
@@ -513,6 +526,7 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br
|
||||
|
||||
// Print the various fields of Hsa Gpu Agents
|
||||
bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) {
|
||||
std::cout << std::flush;
|
||||
std::clog << header << " :" << std::endl;
|
||||
|
||||
const AgentInfo* agent_info;
|
||||
@@ -526,7 +540,6 @@ bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) {
|
||||
std::clog << ">> HSAIL profile : " << agent_info->profile << std::endl;
|
||||
std::clog << ">> Max Wave Size : " << agent_info->max_wave_size << std::endl;
|
||||
std::clog << ">> Max Queue Size : " << agent_info->max_queue_size << std::endl;
|
||||
// std::clog << ">> Kernarg Region Id : " << agent_info->coarse_region.handle << std::endl;
|
||||
std::clog << ">> CU number : " << agent_info->cu_num << std::endl;
|
||||
std::clog << ">> Waves per CU : " << agent_info->waves_per_cu << std::endl;
|
||||
std::clog << ">> SIMDs per CU : " << agent_info->simds_per_cu << std::endl;
|
||||
@@ -536,8 +549,8 @@ bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) {
|
||||
return true;
|
||||
}
|
||||
|
||||
uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, void* packet) {
|
||||
const uint32_t slot_size_b = 0x40;
|
||||
uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) {
|
||||
const uint32_t slot_size_b = CMD_SLOT_SIZE_B;
|
||||
|
||||
// adevance command queue
|
||||
const uint64_t write_idx = hsa_queue_load_write_index_relaxed(queue);
|
||||
@@ -547,14 +560,15 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, void* packet) {
|
||||
}
|
||||
|
||||
uint32_t slot_idx = (uint32_t)(write_idx % queue->size);
|
||||
uint32_t* queue_slot = (uint32_t*)((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b));
|
||||
uint32_t* slot_data = (uint32_t*)packet;
|
||||
uint32_t* queue_slot = reinterpret_cast<uint32_t*>((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b));
|
||||
const uint32_t* slot_data = reinterpret_cast<const uint32_t*>(packet);
|
||||
|
||||
// Copy buffered commands into the queue slot.
|
||||
// Overwrite the AQL invalid header (first dword) last.
|
||||
// This prevents the slot from being read until it's fully written.
|
||||
memcpy(&queue_slot[1], &slot_data[1], slot_size_b - sizeof(uint32_t));
|
||||
std::atomic<uint32_t>* header_atomic_ptr = reinterpret_cast<std::atomic<uint32_t>*>(&queue_slot[0]);
|
||||
std::atomic<uint32_t>* header_atomic_ptr =
|
||||
reinterpret_cast<std::atomic<uint32_t>*>(&queue_slot[0]);
|
||||
header_atomic_ptr->store(slot_data[0], std::memory_order_release);
|
||||
|
||||
// ringdoor bell
|
||||
@@ -563,8 +577,25 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, void* packet) {
|
||||
return write_idx;
|
||||
}
|
||||
|
||||
uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes) {
|
||||
const uint32_t slot_size_b = CMD_SLOT_SIZE_B;
|
||||
if ((size_bytes & (slot_size_b - 1)) != 0) {
|
||||
fprintf(stderr, "HsaRsrcFactory::Submit: Bad packet size %zx\n", size_bytes);
|
||||
abort();
|
||||
}
|
||||
|
||||
const char* begin = reinterpret_cast<const char*>(packet);
|
||||
const char* end = begin + size_bytes;
|
||||
uint64_t write_idx = 0;
|
||||
for (const char* ptr = begin; ptr < end; ptr += slot_size_b) {
|
||||
write_idx = Submit(queue, ptr);
|
||||
}
|
||||
|
||||
return write_idx;
|
||||
}
|
||||
|
||||
HsaRsrcFactory* HsaRsrcFactory::instance_ = NULL;
|
||||
HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_;
|
||||
HsaRsrcFactory::timestamp_t HsaRsrcFactory::timeout_ns_ = HsaTimer::TIMESTAMP_MAX;
|
||||
|
||||
} // namespace util
|
||||
} // namespace roctracer
|
||||
|
||||
@@ -1,27 +1,29 @@
|
||||
/*
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
/**********************************************************************
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
provided that the following conditions are met:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
<95> Redistributions of source code must retain the above copyright notice, this list of
|
||||
conditions and the following disclaimer.
|
||||
<95> Redistributions in binary form must reproduce the above copyright notice, this list of
|
||||
conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
********************************************************************/
|
||||
|
||||
#ifndef _HSA_RSRC_FACTORY_H_
|
||||
#define _HSA_RSRC_FACTORY_H_
|
||||
#ifndef SRC_UTIL_HSA_RSRC_FACTORY_H_
|
||||
#define SRC_UTIL_HSA_RSRC_FACTORY_H_
|
||||
|
||||
#include <hsa.h>
|
||||
#include <hsa_ext_amd.h>
|
||||
@@ -43,26 +45,27 @@ THE SOFTWARE.
|
||||
#define HSA_QUEUE_ALIGN_BYTES 64
|
||||
#define HSA_PACKET_ALIGN_BYTES 64
|
||||
|
||||
#define CHECK_STATUS(msg, status) \
|
||||
if (status != HSA_STATUS_SUCCESS) { \
|
||||
#define CHECK_STATUS(msg, status) do { \
|
||||
if ((status) != HSA_STATUS_SUCCESS) { \
|
||||
const char* emsg = 0; \
|
||||
hsa_status_string(status, &emsg); \
|
||||
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
|
||||
exit(1); \
|
||||
}
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CHECK_ITER_STATUS(msg, status) \
|
||||
if (status != HSA_STATUS_INFO_BREAK) { \
|
||||
#define CHECK_ITER_STATUS(msg, status) do { \
|
||||
if ((status) != HSA_STATUS_INFO_BREAK) { \
|
||||
const char* emsg = 0; \
|
||||
hsa_status_string(status, &emsg); \
|
||||
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
|
||||
exit(1); \
|
||||
}
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
namespace roctracer {
|
||||
namespace util {
|
||||
static const unsigned MEM_PAGE_BYTES = 0x1000;
|
||||
static const unsigned MEM_PAGE_MASK = MEM_PAGE_BYTES - 1;
|
||||
static const size_t MEM_PAGE_BYTES = 0x1000;
|
||||
static const size_t MEM_PAGE_MASK = MEM_PAGE_BYTES - 1;
|
||||
typedef decltype(hsa_agent_t::handle) hsa_agent_handle_t;
|
||||
|
||||
// Encapsulates information about a Hsa Agent such as its
|
||||
@@ -94,13 +97,7 @@ struct AgentInfo {
|
||||
|
||||
// Hsail profile supported by agent
|
||||
hsa_profile_t profile;
|
||||
#if 0
|
||||
// Memory region supporting kernel parameters
|
||||
hsa_region_t coarse_region;
|
||||
|
||||
// Memory region supporting kernel arguments
|
||||
hsa_region_t kernarg_region;
|
||||
#endif
|
||||
// CPU/GPU/kern-arg memory pools
|
||||
hsa_amd_memory_pool_t cpu_pool;
|
||||
hsa_amd_memory_pool_t gpu_pool;
|
||||
@@ -122,9 +119,47 @@ struct AgentInfo {
|
||||
uint32_t shader_arrays_per_se;
|
||||
};
|
||||
|
||||
// HSA timer class
|
||||
// Provides current HSA timestampa and system-clock/ns conversion API
|
||||
class HsaTimer {
|
||||
public:
|
||||
typedef uint64_t timestamp_t;
|
||||
static const timestamp_t TIMESTAMP_MAX = UINT64_MAX;
|
||||
typedef long double freq_t;
|
||||
|
||||
HsaTimer() {
|
||||
timestamp_t sysclock_hz = 0;
|
||||
hsa_status_t status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
|
||||
CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY)", status);
|
||||
sysclock_factor_ = (freq_t)1000000000 / (freq_t)sysclock_hz;
|
||||
}
|
||||
|
||||
// Methods for system-clock/ns conversion
|
||||
timestamp_t sysclock_to_ns(const timestamp_t& sysclock) const {
|
||||
return timestamp_t((freq_t)sysclock * sysclock_factor_);
|
||||
}
|
||||
timestamp_t ns_to_sysclock(const timestamp_t& time) const {
|
||||
return timestamp_t((freq_t)time / sysclock_factor_);
|
||||
}
|
||||
|
||||
// Return timestamp in 'ns'
|
||||
timestamp_t timestamp_ns() const {
|
||||
timestamp_t sysclock;
|
||||
hsa_status_t status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock);
|
||||
CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP)", status);
|
||||
return sysclock_to_ns(sysclock);
|
||||
}
|
||||
|
||||
private:
|
||||
// Timestamp frequency factor
|
||||
freq_t sysclock_factor_;
|
||||
};
|
||||
|
||||
class HsaRsrcFactory {
|
||||
public:
|
||||
static const size_t CMD_SLOT_SIZE_B = 0x40;
|
||||
typedef std::recursive_mutex mutex_t;
|
||||
typedef HsaTimer::timestamp_t timestamp_t;
|
||||
|
||||
static HsaRsrcFactory* Create(bool initialize_hsa = true) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
@@ -204,9 +239,24 @@ class HsaRsrcFactory {
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Allocate memory for command buffer.
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
// @param size Size of memory in terms of bytes
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
uint8_t* AllocateCmdMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Wait signal
|
||||
void SignalWait(const hsa_signal_t& signal) const;
|
||||
|
||||
// Wait signal with signal value restore
|
||||
void SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const;
|
||||
|
||||
// Copy data from GPU to host memory
|
||||
bool CopyToHost(const hsa_agent_t& agent, void* dst, const void* src, size_t size);
|
||||
bool CopyToHost(const AgentInfo* agent_info, void* dst, const void* src, size_t size);
|
||||
bool Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size);
|
||||
bool Memcpy(const AgentInfo* agent_info, void* dst, const void* src, size_t size);
|
||||
|
||||
// Memory free method
|
||||
static bool FreeMemory(void* ptr);
|
||||
|
||||
// Loads an Assembled Brig file and Finalizes it into Device Isa
|
||||
// @param agent_info Gpu device for which to finalize
|
||||
@@ -216,21 +266,35 @@ class HsaRsrcFactory {
|
||||
// be used to submit for execution
|
||||
// @return true if successful, false otherwise
|
||||
bool LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, const char* kernel_name,
|
||||
hsa_executable_t* hsa_exec, hsa_executable_symbol_t* code_desc);
|
||||
hsa_executable_t* hsa_exec, hsa_executable_symbol_t* code_desc);
|
||||
|
||||
// Print the various fields of Hsa Gpu Agents
|
||||
bool PrintGpuAgents(const std::string& header);
|
||||
|
||||
// Submit AQL packet to given queue
|
||||
static uint64_t Submit(hsa_queue_t* queue, void* packet);
|
||||
static uint64_t Submit(hsa_queue_t* queue, const void* packet);
|
||||
static uint64_t Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes);
|
||||
|
||||
// Return AqlProfile API table
|
||||
typedef hsa_ven_amd_aqlprofile_1_00_pfn_t aqlprofile_pfn_t;
|
||||
typedef hsa_ven_amd_aqlprofile_pfn_t aqlprofile_pfn_t;
|
||||
const aqlprofile_pfn_t* AqlProfileApi() const { return &aqlprofile_api_; }
|
||||
|
||||
// Return Loader API table
|
||||
const hsa_ven_amd_loader_1_00_pfn_t* LoaderApi() const { return &loader_api_; }
|
||||
|
||||
// Methods for system-clock/ns conversion and timestamp in 'ns'
|
||||
timestamp_t SysclockToNs(const timestamp_t& sysclock) const { return timer_->sysclock_to_ns(sysclock); }
|
||||
timestamp_t NsToSysclock(const timestamp_t& time) const { return timer_->ns_to_sysclock(time); }
|
||||
timestamp_t TimestampNs() const { return timer_->timestamp_ns(); }
|
||||
|
||||
timestamp_t GetSysTimeout() const { return timeout_; }
|
||||
static timestamp_t GetTimeoutNs() { return timeout_ns_; }
|
||||
static void SetTimeoutNs(const timestamp_t& time) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
timeout_ns_ = time;
|
||||
if (instance_ != NULL) instance_->timeout_ = instance_->timer_->ns_to_sysclock(time);
|
||||
}
|
||||
|
||||
private:
|
||||
// System agents iterating callback
|
||||
static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data);
|
||||
@@ -243,17 +307,20 @@ class HsaRsrcFactory {
|
||||
|
||||
// Constructor of the class. Will initialize the Hsa Runtime and
|
||||
// query the system topology to get the list of Cpu and Gpu devices
|
||||
HsaRsrcFactory(bool initialize_hsa);
|
||||
explicit HsaRsrcFactory(bool initialize_hsa);
|
||||
|
||||
// Destructor of the class
|
||||
~HsaRsrcFactory();
|
||||
|
||||
// HSA was initialized
|
||||
const bool initialize_hsa_;
|
||||
|
||||
// Add an instance of AgentInfo representing a Hsa Gpu agent
|
||||
const AgentInfo* AddAgentInfo(const hsa_agent_t agent);
|
||||
|
||||
// To mmap command buffer memory
|
||||
static const bool CMD_MEMORY_MMAP = false;
|
||||
|
||||
// HSA was initialized
|
||||
const bool initialize_hsa_;
|
||||
|
||||
static HsaRsrcFactory* instance_;
|
||||
static mutex_t mutex_;
|
||||
|
||||
@@ -273,9 +340,20 @@ class HsaRsrcFactory {
|
||||
|
||||
// Loader API table
|
||||
hsa_ven_amd_loader_1_00_pfn_t loader_api_;
|
||||
|
||||
// System timeout, ns
|
||||
static timestamp_t timeout_ns_;
|
||||
// System timeout, sysclock
|
||||
timestamp_t timeout_;
|
||||
|
||||
// HSA timer
|
||||
HsaTimer* timer_;
|
||||
|
||||
// CPU/kern-arg memory pools
|
||||
hsa_amd_memory_pool_t *cpu_pool_;
|
||||
hsa_amd_memory_pool_t *kern_arg_pool_;
|
||||
};
|
||||
|
||||
} // namespace util
|
||||
} // namespace roctracer
|
||||
|
||||
#endif // _HSA_RSRC_FACTORY_H_
|
||||
#endif // SRC_UTIL_HSA_RSRC_FACTORY_H_
|
||||
|
||||
@@ -306,6 +306,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
} else {
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HSA_API, hsa_api_callback, NULL));
|
||||
}
|
||||
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HSA_API));
|
||||
printf(")\n");
|
||||
}
|
||||
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren