diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index a2747e34b4..da07bbd56e 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -34,6 +34,7 @@ THE SOFTWARE. #include #include "core/loader.h" +#include "proxy/tracker.h" #include "ext/hsa_rt_utils.hpp" #include "util/exception.h" #include "util/hsa_rsrc_factory.h" @@ -84,6 +85,21 @@ THE SOFTWARE. // Internal library methods // namespace roctracer { +decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn; +decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn; + +namespace hsa_support { +// callbacks table +cb_table_t cb_table; +// activity enabled +bool enabled = false;; +// Table of function pointers to HSA Core Runtime +CoreApiTable CoreApiTable_saved{}; +// Table of function pointers to AMD extensions +AmdExtTable AmdExtTable_saved{}; +// Table of function pointers to HSA Image Extension +ImageExtTable ImageExtTable_saved{}; +} roctracer_status_t GetExcStatus(const std::exception& e) { const util::exception* roctracer_exc_ptr = dynamic_cast(&e); @@ -276,7 +292,7 @@ CONSTRUCTOR_API void constructor() { } DESTRUCTOR_API void destructor() { - util::HsaRsrcFactory::Destroy(); + ::util::HsaRsrcFactory::Destroy(); util::Logger::Destroy(); } @@ -350,6 +366,65 @@ void HCC_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) { pool->Write(*record_ptr); } +bool hsa_async_copy_handler(hsa_signal_value_t value, void* arg) { + ::proxy::Tracker::entry_t* entry = reinterpret_cast<::proxy::Tracker::entry_t*>(arg); + printf("%lu:%lu async-copy%lu\n", entry->record->begin, entry->record->end, entry->index); + return false; +} + +hsa_status_t hsa_amd_memory_async_copy_interceptor( + void* dst, hsa_agent_t dst_agent, const void* src, + hsa_agent_t src_agent, size_t size, uint32_t num_dep_signals, + const hsa_signal_t* dep_signals, hsa_signal_t completion_signal) +{ + hsa_status_t status = HSA_STATUS_SUCCESS; + if (hsa_support::enabled) { + ::proxy::Tracker* tracker = &::proxy::Tracker::Instance(); + ::proxy::Tracker::entry_t* tracker_entry = tracker->Alloc(hsa_agent_t{}, completion_signal); + status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src, + src_agent, size, num_dep_signals, + dep_signals, tracker_entry->signal); + if (status == HSA_STATUS_SUCCESS) { + tracker->EnableMemcopy(tracker_entry, hsa_async_copy_handler, reinterpret_cast(tracker_entry)); + } else { + tracker->Delete(tracker_entry); + } + } else { + status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src, + src_agent, size, num_dep_signals, + dep_signals, completion_signal); + } + return status; +} + +hsa_status_t hsa_amd_memory_async_copy_rect_interceptor( + const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src, + const hsa_dim3_t* src_offset, const hsa_dim3_t* range, hsa_agent_t copy_agent, + hsa_amd_copy_direction_t dir, uint32_t num_dep_signals, const hsa_signal_t* dep_signals, + hsa_signal_t completion_signal) +{ + hsa_status_t status = HSA_STATUS_SUCCESS; + if (hsa_support::enabled) { + ::proxy::Tracker* tracker = &::proxy::Tracker::Instance(); + ::proxy::Tracker::entry_t* tracker_entry = tracker->Alloc(hsa_agent_t{}, completion_signal); + status = hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src, + src_offset, range, copy_agent, + dir, num_dep_signals, dep_signals, + tracker_entry->signal); + if (status == HSA_STATUS_SUCCESS) { + tracker->EnableMemcopy(tracker_entry, hsa_async_copy_handler, reinterpret_cast(tracker_entry)); + } else { + tracker->Delete(tracker_entry); + } + } else { + status = hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src, + src_offset, range, copy_agent, + dir, num_dep_signals, dep_signals, + completion_signal); + } + return status; +} + util::Logger::mutex_t util::Logger::mutex_; util::Logger* util::Logger::instance_ = NULL; MemoryPool* memory_pool = NULL; @@ -359,18 +434,11 @@ memory_pool_mutex_t memory_pool_mutex; Loader::mutex_t Loader::mutex_; HipLoader* HipLoader::instance_; HccLoader* HccLoader::instance_; +} -namespace hsa_support { -// callbacks table -cb_table_t cb_table; -// Table of function pointers to HSA Core Runtime -CoreApiTable CoreApiTable_saved{}; -// Table of function pointers to AMD extensions -AmdExtTable AmdExtTable_saved{}; -// Table of function pointers to HSA Image Extension -ImageExtTable ImageExtTable_saved{}; -} -} +proxy::Tracker* proxy::Tracker::instance_ = NULL; +proxy::Tracker::mutex_t proxy::Tracker::glob_mutex_; +proxy::Tracker::counter_t proxy::Tracker::counter_ = 0; /////////////////////////////////////////////////////////////////////////////////////////////////// // Public library methods @@ -507,9 +575,7 @@ static void roctracer_disable_callback_impl( uint32_t op) { switch (domain) { - case ACTIVITY_DOMAIN_HSA_API: { - break; - } + case ACTIVITY_DOMAIN_HSA_API: break; case ACTIVITY_DOMAIN_HCC_OPS: break; case ACTIVITY_DOMAIN_HIP_API: { hipError_t hip_err = roctracer::HipLoader::Instance().RemoveApiCallback(op); @@ -593,7 +659,10 @@ static void roctracer_enable_activity_impl( { if (pool == NULL) pool = roctracer_default_pool(); switch (domain) { - case ACTIVITY_DOMAIN_HSA_API: break; + case ACTIVITY_DOMAIN_HSA_API: { + roctracer::hsa_support::enabled = true; + break; + } case ACTIVITY_DOMAIN_HCC_OPS: { if (roctracer::HccLoader::GetRef() == NULL) { roctracer::HccLoader::Instance().InitActivityCallback((void*)roctracer::HCC_ActivityIdCallback, @@ -651,7 +720,10 @@ static void roctracer_disable_activity_impl( uint32_t op) { switch (domain) { - case ACTIVITY_DOMAIN_HSA_API: break; + case ACTIVITY_DOMAIN_HSA_API: { + roctracer::hsa_support::enabled = false; + break; + } case ACTIVITY_DOMAIN_HCC_OPS: { const bool succ = roctracer::HccLoader::Instance().EnableActivityCallback(op, false); if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback(NULL) error domain(" << domain << ") op(" << op << ")"); @@ -731,6 +803,14 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, const char* const* failed_tool_names) { roctracer_set_properties(ACTIVITY_DOMAIN_HSA_API, (void*)table); + + hsa_status_t status = hsa_amd_profiling_async_copy_enable(true); + if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "hsa_amd_profiling_async_copy_enable"); + roctracer::hsa_amd_memory_async_copy_fn = table->amd_ext_->hsa_amd_memory_async_copy_fn; + roctracer::hsa_amd_memory_async_copy_rect_fn = table->amd_ext_->hsa_amd_memory_async_copy_rect_fn; + table->amd_ext_->hsa_amd_memory_async_copy_fn = roctracer::hsa_amd_memory_async_copy_interceptor; + table->amd_ext_->hsa_amd_memory_async_copy_rect_fn = roctracer::hsa_amd_memory_async_copy_rect_interceptor; + return true; } diff --git a/src/util/hsa_rsrc_factory.cpp b/src/util/hsa_rsrc_factory.cpp index 6eba3c19d6..1d0f6c6d4f 100644 --- a/src/util/hsa_rsrc_factory.cpp +++ b/src/util/hsa_rsrc_factory.cpp @@ -1,24 +1,26 @@ -/* -Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +/********************************************************************** +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: +Redistribution and use in source and binary forms, with or without modification, are permitted +provided that the following conditions are met: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. +<95> Redistributions of source code must retain the above copyright notice, this list of +conditions and the following disclaimer. +<95> Redistributions in binary form must reproduce the above copyright notice, this list of +conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +********************************************************************/ #include "util/hsa_rsrc_factory.h" @@ -31,6 +33,7 @@ THE SOFTWARE. #include #include #include +#include #include #include @@ -41,11 +44,9 @@ THE SOFTWARE. #include #include -#ifndef AQL_PROFILE_READ_API_ENABLE -#define AQL_PROFILE_READ_API_ENABLE 0 -#endif +#include "util/exception.h" +#include "util/logger.h" -namespace roctracer { namespace util { // Callback function to get available in the system agents @@ -66,8 +67,7 @@ hsa_status_t HsaRsrcFactory::GetHsaAgentsCallback(hsa_agent_t agent, void* data) // returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but // no pool was found meeting the requirements. If an error is encountered, we // return that error. -static hsa_status_t -FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) { +static hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) { hsa_status_t err; hsa_amd_segment_t segment; uint32_t flag; @@ -76,21 +76,18 @@ FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) { return HSA_STATUS_ERROR_INVALID_ARGUMENT; } - err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, - &segment); + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment); CHECK_STATUS("hsa_amd_memory_pool_get_info", err); if (HSA_AMD_SEGMENT_GLOBAL != segment) { return HSA_STATUS_SUCCESS; } - err = hsa_amd_memory_pool_get_info(pool, - HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag); + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag); CHECK_STATUS("hsa_amd_memory_pool_get_info", err); uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT; - if ((karg_st == 0 && kern_arg) || - (karg_st != 0 && !kern_arg)) { + if ((karg_st == 0 && kern_arg) || (karg_st != 0 && !kern_arg)) { return HSA_STATUS_SUCCESS; } @@ -111,59 +108,52 @@ hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data) { hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) { return FindGlobalPool(pool, data, true); } -#if 0 -// Callback function to find and bind kernarg region of an agent -hsa_status_t HsaRsrcFactory::FindMemRegionsCallback(hsa_region_t region, void* data) { - hsa_region_global_flag_t flags; - hsa_region_segment_t segment_id; - hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id); - if (segment_id != HSA_REGION_SEGMENT_GLOBAL) { - return HSA_STATUS_SUCCESS; - } - - AgentInfo* agent_info = (AgentInfo*)data; - hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags); - if (flags & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) { - agent_info->coarse_region = region; - } - - if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) { - agent_info->kernarg_region = region; - } - - return HSA_STATUS_SUCCESS; -} -#endif // Constructor of the class HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize_hsa) { hsa_status_t status; + + cpu_pool_ = NULL; + kern_arg_pool_ = NULL; + // Initialize the Hsa Runtime if (initialize_hsa_) { status = hsa_init(); CHECK_STATUS("Error in hsa_init", status); } + // Discover the set of Gpu devices available on the platform status = hsa_iterate_agents(GetHsaAgentsCallback, this); CHECK_STATUS("Error Calling hsa_iterate_agents", status); + if (cpu_pool_ == NULL) CHECK_STATUS("CPU memory pool is not found", HSA_STATUS_ERROR); + if (kern_arg_pool_ == NULL) CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR); // Get AqlProfile API table aqlprofile_api_ = {0}; #ifdef ROCP_LD_AQLPROFILE status = LoadAqlProfileLib(&aqlprofile_api_); #else - status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &aqlprofile_api_); + status = hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, hsa_ven_amd_aqlprofile_VERSION_MAJOR, sizeof(aqlprofile_api_), &aqlprofile_api_); #endif CHECK_STATUS("aqlprofile API table load failed", status); // Get Loader API table loader_api_ = {0}; - status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_LOADER, 1, 0, &loader_api_); + status = hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, sizeof(loader_api_), &loader_api_); CHECK_STATUS("loader API table query failed", status); + + // Instantiate HSA timer + timer_ = new HsaTimer; + CHECK_STATUS("HSA timer allocation failed", + (timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS); + + // System timeout + timeout_ = (timeout_ns_ == HsaTimer::TIMESTAMP_MAX) ? timeout_ns_ : timer_->ns_to_sysclock(timeout_ns_); } // Destructor of the class HsaRsrcFactory::~HsaRsrcFactory() { + delete timer_; for (auto p : cpu_list_) delete p; for (auto p : gpu_list_) delete p; if (initialize_hsa_) { @@ -173,39 +163,35 @@ HsaRsrcFactory::~HsaRsrcFactory() { } hsa_status_t HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) { - void* handle = dlopen(kAqlProfileLib, RTLD_NOW); - if (handle == NULL) { - fprintf(stderr, "Loading '%s' failed, %s\n", kAqlProfileLib, dlerror()); - return HSA_STATUS_ERROR; - } - dlerror(); /* Clear any existing error */ + void* handle = dlopen(kAqlProfileLib, RTLD_NOW); + if (handle == NULL) { + fprintf(stderr, "Loading '%s' failed, %s\n", kAqlProfileLib, dlerror()); + return HSA_STATUS_ERROR; + } + dlerror(); /* Clear any existing error */ - api->hsa_ven_amd_aqlprofile_error_string = - (decltype(::hsa_ven_amd_aqlprofile_error_string)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_error_string"); - api->hsa_ven_amd_aqlprofile_validate_event = - (decltype(::hsa_ven_amd_aqlprofile_validate_event)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_validate_event"); - api->hsa_ven_amd_aqlprofile_start = - (decltype(::hsa_ven_amd_aqlprofile_start)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_start"); - api->hsa_ven_amd_aqlprofile_stop = - (decltype(::hsa_ven_amd_aqlprofile_stop)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_stop"); -#if AQL_PROFILE_READ_API_ENABLE - api->hsa_ven_amd_aqlprofile_read = - (decltype(::hsa_ven_amd_aqlprofile_read)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_read"); -#endif // AQL_PROFILE_READ_API_ENABLE - api->hsa_ven_amd_aqlprofile_legacy_get_pm4 = - (decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); - api->hsa_ven_amd_aqlprofile_get_info = - (decltype(::hsa_ven_amd_aqlprofile_get_info)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_get_info"); - api->hsa_ven_amd_aqlprofile_iterate_data = - (decltype(::hsa_ven_amd_aqlprofile_iterate_data)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_iterate_data"); + api->hsa_ven_amd_aqlprofile_error_string = + (decltype(::hsa_ven_amd_aqlprofile_error_string)*)dlsym( + handle, "hsa_ven_amd_aqlprofile_error_string"); + api->hsa_ven_amd_aqlprofile_validate_event = + (decltype(::hsa_ven_amd_aqlprofile_validate_event)*)dlsym( + handle, "hsa_ven_amd_aqlprofile_validate_event"); + api->hsa_ven_amd_aqlprofile_start = + (decltype(::hsa_ven_amd_aqlprofile_start)*)dlsym(handle, "hsa_ven_amd_aqlprofile_start"); + api->hsa_ven_amd_aqlprofile_stop = + (decltype(::hsa_ven_amd_aqlprofile_stop)*)dlsym(handle, "hsa_ven_amd_aqlprofile_stop"); +#ifdef AQLPROF_NEW_API + api->hsa_ven_amd_aqlprofile_read = + (decltype(::hsa_ven_amd_aqlprofile_read)*)dlsym(handle, "hsa_ven_amd_aqlprofile_read"); +#endif + api->hsa_ven_amd_aqlprofile_legacy_get_pm4 = + (decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*)dlsym( + handle, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); + api->hsa_ven_amd_aqlprofile_get_info = (decltype(::hsa_ven_amd_aqlprofile_get_info)*)dlsym( + handle, "hsa_ven_amd_aqlprofile_get_info"); + api->hsa_ven_amd_aqlprofile_iterate_data = + (decltype(::hsa_ven_amd_aqlprofile_iterate_data)*)dlsym( + handle, "hsa_ven_amd_aqlprofile_iterate_data"); return HSA_STATUS_SUCCESS; } @@ -227,9 +213,9 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) { agent_info->dev_index = cpu_list_.size(); status = hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->cpu_pool); - CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(cpu pool)", status); + if ((status == HSA_STATUS_INFO_BREAK) && (cpu_pool_ == NULL)) cpu_pool_ = &agent_info->cpu_pool; status = hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool, &agent_info->kern_arg_pool); - CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(kern arg pool)", status); + if ((status == HSA_STATUS_INFO_BREAK) && (kern_arg_pool_ == NULL)) kern_arg_pool_ = &agent_info->kern_arg_pool; agent_info->gpu_pool = {}; cpu_list_.push_back(agent_info); @@ -247,23 +233,22 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) { hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size); hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile); agent_info->is_apu = (agent_info->profile == HSA_PROFILE_FULL) ? true : false; - hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), &agent_info->cu_num); - hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU), &agent_info->waves_per_cu); - hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), &agent_info->simds_per_cu); - hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), &agent_info->se_num); - hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE), &agent_info->shader_arrays_per_se); + hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), + &agent_info->cu_num); + hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU), + &agent_info->waves_per_cu); + hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), + &agent_info->simds_per_cu); + hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), + &agent_info->se_num); + hsa_agent_get_info(agent, + static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE), + &agent_info->shader_arrays_per_se); agent_info->cpu_pool = {}; agent_info->kern_arg_pool = {}; status = hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool); CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(gpu pool)", status); -#if 0 - // Initialize memory regions to zero - agent_info->kernarg_region.handle = 0; - agent_info->coarse_region.handle = 0; - // Find and Bind Memory regions of the Gpu agent - hsa_agent_iterate_regions(agent, FindMemRegionsCallback, agent_info); -#endif // Set GPU index agent_info->dev_index = gpu_list_.size(); @@ -377,14 +362,8 @@ uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t hsa_status_t status = HSA_STATUS_ERROR; uint8_t* buffer = NULL; size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - status = hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, (void**)&buffer); - // Only GPU can access the memory - if (status == HSA_STATUS_SUCCESS) { - hsa_agent_t agents_list[1] = {agent_info->dev_id}; - status = hsa_amd_agents_allow_access(1, agents_list, NULL, buffer); - } + status = hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, reinterpret_cast(&buffer)); uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL; - printf("AllocateLocalMemory %p\n", ptr); return ptr; } @@ -398,16 +377,14 @@ uint8_t* HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size uint8_t* buffer = NULL; if (!cpu_agents_.empty()) { size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - status = hsa_amd_memory_pool_allocate(cpu_list_[0]->kern_arg_pool, size, 0, (void**)&buffer); + status = hsa_amd_memory_pool_allocate(*kern_arg_pool_, size, 0, reinterpret_cast(&buffer)); // Both the CPU and GPU can access the kernel arguments if (status == HSA_STATUS_SUCCESS) { - auto agents_vec = cpu_agents_; - agents_vec.push_back(agent_info->dev_id); - status = hsa_amd_agents_allow_access(agents_vec.size(), &agents_vec[0], NULL, buffer); + hsa_agent_t ag_list[1] = {agent_info->dev_id}; + status = hsa_amd_agents_allow_access(1, ag_list, NULL, buffer); } } uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL; - printf("AllocateKernargMemory %p\n", ptr); return ptr; } @@ -418,41 +395,76 @@ uint8_t* HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t size) { hsa_status_t status = HSA_STATUS_ERROR; uint8_t* buffer = NULL; + size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; if (!cpu_agents_.empty()) { - size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - status = hsa_amd_memory_pool_allocate(cpu_list_[0]->cpu_pool, size, 0, (void**)&buffer); + status = hsa_amd_memory_pool_allocate(*cpu_pool_, size, 0, reinterpret_cast(&buffer)); // Both the CPU and GPU can access the memory if (status == HSA_STATUS_SUCCESS) { - auto agents_vec = cpu_agents_; - agents_vec.push_back(agent_info->dev_id); - status = hsa_amd_agents_allow_access(agents_vec.size(), &agents_vec[0], NULL, buffer); + hsa_agent_t ag_list[1] = {agent_info->dev_id}; + status = hsa_amd_agents_allow_access(1, ag_list, NULL, buffer); } } uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL; - printf("AllocateSysMemory %p\n", ptr); return ptr; } +// Allocate memory for command buffer. +// @param agent_info Agent from whose memory region to allocate +// @param size Size of memory in terms of bytes +// @return uint8_t* Pointer to buffer, null if allocation fails. +uint8_t* HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t size) { + size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; + uint8_t* ptr = (agent_info->is_apu && CMD_MEMORY_MMAP) + ? reinterpret_cast( + mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANONYMOUS, 0, 0)) + : AllocateSysMemory(agent_info, size); + return ptr; +} + +// Wait signal +void HsaRsrcFactory::SignalWait(const hsa_signal_t& signal) const { + while (1) { + const hsa_signal_value_t signal_value = + hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, timeout_, HSA_WAIT_STATE_BLOCKED); + if (signal_value == 0) { + break; + } else { + if (signal_value == 1) { WARN_LOGGING("signal waiting..."); } + else { EXC_RAISING(HSA_STATUS_ERROR, "hsa_signal_wait_scacquire (" << signal_value << ")"); } + } + } +} + +// Wait signal with signal value restore +void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const { + SignalWait(signal); + hsa_signal_store_relaxed(const_cast(signal), signal_value); +} + // Copy data from GPU to host memory -bool HsaRsrcFactory::CopyToHost(const hsa_agent_t& agent, void* dst, const void* src, size_t size) { +bool HsaRsrcFactory::Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size) { hsa_status_t status = HSA_STATUS_ERROR; if (!cpu_agents_.empty()) { hsa_signal_t s = {}; - hsa_status_t status = hsa_signal_create(1, 0, NULL, &s); - if (status == HSA_STATUS_SUCCESS) { - status = hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s); - if (status == HSA_STATUS_SUCCESS) { - if (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED) != 0) { - status = HSA_STATUS_ERROR; - } - } - status = hsa_signal_destroy(s); - } + status = hsa_signal_create(1, 0, NULL, &s); + CHECK_STATUS("hsa_signal_create()", status); + status = hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s); + CHECK_STATUS("hsa_amd_memory_async_copy()", status); + SignalWait(s); + status = hsa_signal_destroy(s); + CHECK_STATUS("hsa_signal_destroy()", status); } return (status == HSA_STATUS_SUCCESS); } -bool HsaRsrcFactory::CopyToHost(const AgentInfo* agent_info, void* dst, const void* src, size_t size) { - return CopyToHost(agent_info->dev_id, dst, src, size); +bool HsaRsrcFactory::Memcpy(const AgentInfo* agent_info, void* dst, const void* src, size_t size) { + return Memcpy(agent_info->dev_id, dst, src, size); +} + +// Memory free method +bool HsaRsrcFactory::FreeMemory(void* ptr) { + const hsa_status_t status = hsa_memory_free(ptr); + CHECK_STATUS("hsa_memory_free", status); + return (status == HSA_STATUS_SUCCESS); } // Loads an Assembled Brig file and Finalizes it into Device Isa @@ -463,7 +475,8 @@ bool HsaRsrcFactory::CopyToHost(const AgentInfo* agent_info, void* dst, const vo // be used to submit for execution // @return bool true if successful, false otherwise bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, - const char* kernel_name, hsa_executable_t* executable, hsa_executable_symbol_t* code_desc) { + const char* kernel_name, hsa_executable_t* executable, + hsa_executable_symbol_t* code_desc) { hsa_status_t status = HSA_STATUS_ERROR; // Build the code object filename @@ -487,13 +500,13 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br } // Create executable. - status = hsa_executable_create_alt(HSA_PROFILE_FULL, - HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, executable); + status = hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, executable); CHECK_STATUS("Error in creating executable object", status); // Load code object. - status = hsa_executable_load_agent_code_object(*executable, agent_info->dev_id, - code_obj_rdr, NULL, NULL); + status = hsa_executable_load_agent_code_object(*executable, agent_info->dev_id, code_obj_rdr, + NULL, NULL); CHECK_STATUS("Error in loading executable object", status); // Freeze executable. @@ -513,6 +526,7 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br // Print the various fields of Hsa Gpu Agents bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) { + std::cout << std::flush; std::clog << header << " :" << std::endl; const AgentInfo* agent_info; @@ -526,7 +540,6 @@ bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) { std::clog << ">> HSAIL profile : " << agent_info->profile << std::endl; std::clog << ">> Max Wave Size : " << agent_info->max_wave_size << std::endl; std::clog << ">> Max Queue Size : " << agent_info->max_queue_size << std::endl; -// std::clog << ">> Kernarg Region Id : " << agent_info->coarse_region.handle << std::endl; std::clog << ">> CU number : " << agent_info->cu_num << std::endl; std::clog << ">> Waves per CU : " << agent_info->waves_per_cu << std::endl; std::clog << ">> SIMDs per CU : " << agent_info->simds_per_cu << std::endl; @@ -536,8 +549,8 @@ bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) { return true; } -uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, void* packet) { - const uint32_t slot_size_b = 0x40; +uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) { + const uint32_t slot_size_b = CMD_SLOT_SIZE_B; // adevance command queue const uint64_t write_idx = hsa_queue_load_write_index_relaxed(queue); @@ -547,14 +560,15 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, void* packet) { } uint32_t slot_idx = (uint32_t)(write_idx % queue->size); - uint32_t* queue_slot = (uint32_t*)((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b)); - uint32_t* slot_data = (uint32_t*)packet; + uint32_t* queue_slot = reinterpret_cast((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b)); + const uint32_t* slot_data = reinterpret_cast(packet); // Copy buffered commands into the queue slot. // Overwrite the AQL invalid header (first dword) last. // This prevents the slot from being read until it's fully written. memcpy(&queue_slot[1], &slot_data[1], slot_size_b - sizeof(uint32_t)); - std::atomic* header_atomic_ptr = reinterpret_cast*>(&queue_slot[0]); + std::atomic* header_atomic_ptr = + reinterpret_cast*>(&queue_slot[0]); header_atomic_ptr->store(slot_data[0], std::memory_order_release); // ringdoor bell @@ -563,8 +577,25 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, void* packet) { return write_idx; } +uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes) { + const uint32_t slot_size_b = CMD_SLOT_SIZE_B; + if ((size_bytes & (slot_size_b - 1)) != 0) { + fprintf(stderr, "HsaRsrcFactory::Submit: Bad packet size %zx\n", size_bytes); + abort(); + } + + const char* begin = reinterpret_cast(packet); + const char* end = begin + size_bytes; + uint64_t write_idx = 0; + for (const char* ptr = begin; ptr < end; ptr += slot_size_b) { + write_idx = Submit(queue, ptr); + } + + return write_idx; +} + HsaRsrcFactory* HsaRsrcFactory::instance_ = NULL; HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_; +HsaRsrcFactory::timestamp_t HsaRsrcFactory::timeout_ns_ = HsaTimer::TIMESTAMP_MAX; } // namespace util -} // namespace roctracer diff --git a/src/util/hsa_rsrc_factory.h b/src/util/hsa_rsrc_factory.h index 845da985f2..8d8b914dc7 100644 --- a/src/util/hsa_rsrc_factory.h +++ b/src/util/hsa_rsrc_factory.h @@ -1,27 +1,29 @@ -/* -Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +/********************************************************************** +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: +Redistribution and use in source and binary forms, with or without modification, are permitted +provided that the following conditions are met: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. +<95> Redistributions of source code must retain the above copyright notice, this list of +conditions and the following disclaimer. +<95> Redistributions in binary form must reproduce the above copyright notice, this list of +conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +********************************************************************/ -#ifndef _HSA_RSRC_FACTORY_H_ -#define _HSA_RSRC_FACTORY_H_ +#ifndef SRC_UTIL_HSA_RSRC_FACTORY_H_ +#define SRC_UTIL_HSA_RSRC_FACTORY_H_ #include #include @@ -43,26 +45,27 @@ THE SOFTWARE. #define HSA_QUEUE_ALIGN_BYTES 64 #define HSA_PACKET_ALIGN_BYTES 64 -#define CHECK_STATUS(msg, status) \ - if (status != HSA_STATUS_SUCCESS) { \ +#define CHECK_STATUS(msg, status) do { \ + if ((status) != HSA_STATUS_SUCCESS) { \ const char* emsg = 0; \ hsa_status_string(status, &emsg); \ printf("%s: %s\n", msg, emsg ? emsg : ""); \ - exit(1); \ - } + abort(); \ + } \ +} while (0) -#define CHECK_ITER_STATUS(msg, status) \ - if (status != HSA_STATUS_INFO_BREAK) { \ +#define CHECK_ITER_STATUS(msg, status) do { \ + if ((status) != HSA_STATUS_INFO_BREAK) { \ const char* emsg = 0; \ hsa_status_string(status, &emsg); \ printf("%s: %s\n", msg, emsg ? emsg : ""); \ - exit(1); \ - } + abort(); \ + } \ +} while (0) -namespace roctracer { namespace util { -static const unsigned MEM_PAGE_BYTES = 0x1000; -static const unsigned MEM_PAGE_MASK = MEM_PAGE_BYTES - 1; +static const size_t MEM_PAGE_BYTES = 0x1000; +static const size_t MEM_PAGE_MASK = MEM_PAGE_BYTES - 1; typedef decltype(hsa_agent_t::handle) hsa_agent_handle_t; // Encapsulates information about a Hsa Agent such as its @@ -94,13 +97,7 @@ struct AgentInfo { // Hsail profile supported by agent hsa_profile_t profile; -#if 0 - // Memory region supporting kernel parameters - hsa_region_t coarse_region; - // Memory region supporting kernel arguments - hsa_region_t kernarg_region; -#endif // CPU/GPU/kern-arg memory pools hsa_amd_memory_pool_t cpu_pool; hsa_amd_memory_pool_t gpu_pool; @@ -122,9 +119,47 @@ struct AgentInfo { uint32_t shader_arrays_per_se; }; +// HSA timer class +// Provides current HSA timestampa and system-clock/ns conversion API +class HsaTimer { + public: + typedef uint64_t timestamp_t; + static const timestamp_t TIMESTAMP_MAX = UINT64_MAX; + typedef long double freq_t; + + HsaTimer() { + timestamp_t sysclock_hz = 0; + hsa_status_t status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); + CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY)", status); + sysclock_factor_ = (freq_t)1000000000 / (freq_t)sysclock_hz; + } + + // Methods for system-clock/ns conversion + timestamp_t sysclock_to_ns(const timestamp_t& sysclock) const { + return timestamp_t((freq_t)sysclock * sysclock_factor_); + } + timestamp_t ns_to_sysclock(const timestamp_t& time) const { + return timestamp_t((freq_t)time / sysclock_factor_); + } + + // Return timestamp in 'ns' + timestamp_t timestamp_ns() const { + timestamp_t sysclock; + hsa_status_t status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock); + CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP)", status); + return sysclock_to_ns(sysclock); + } + + private: + // Timestamp frequency factor + freq_t sysclock_factor_; +}; + class HsaRsrcFactory { public: + static const size_t CMD_SLOT_SIZE_B = 0x40; typedef std::recursive_mutex mutex_t; + typedef HsaTimer::timestamp_t timestamp_t; static HsaRsrcFactory* Create(bool initialize_hsa = true) { std::lock_guard lck(mutex_); @@ -204,9 +239,24 @@ class HsaRsrcFactory { // @return uint8_t* Pointer to buffer, null if allocation fails. uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size); + // Allocate memory for command buffer. + // @param agent_info Agent from whose memory region to allocate + // @param size Size of memory in terms of bytes + // @return uint8_t* Pointer to buffer, null if allocation fails. + uint8_t* AllocateCmdMemory(const AgentInfo* agent_info, size_t size); + + // Wait signal + void SignalWait(const hsa_signal_t& signal) const; + + // Wait signal with signal value restore + void SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const; + // Copy data from GPU to host memory - bool CopyToHost(const hsa_agent_t& agent, void* dst, const void* src, size_t size); - bool CopyToHost(const AgentInfo* agent_info, void* dst, const void* src, size_t size); + bool Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size); + bool Memcpy(const AgentInfo* agent_info, void* dst, const void* src, size_t size); + + // Memory free method + static bool FreeMemory(void* ptr); // Loads an Assembled Brig file and Finalizes it into Device Isa // @param agent_info Gpu device for which to finalize @@ -216,21 +266,35 @@ class HsaRsrcFactory { // be used to submit for execution // @return true if successful, false otherwise bool LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, const char* kernel_name, - hsa_executable_t* hsa_exec, hsa_executable_symbol_t* code_desc); + hsa_executable_t* hsa_exec, hsa_executable_symbol_t* code_desc); // Print the various fields of Hsa Gpu Agents bool PrintGpuAgents(const std::string& header); // Submit AQL packet to given queue - static uint64_t Submit(hsa_queue_t* queue, void* packet); + static uint64_t Submit(hsa_queue_t* queue, const void* packet); + static uint64_t Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes); // Return AqlProfile API table - typedef hsa_ven_amd_aqlprofile_1_00_pfn_t aqlprofile_pfn_t; + typedef hsa_ven_amd_aqlprofile_pfn_t aqlprofile_pfn_t; const aqlprofile_pfn_t* AqlProfileApi() const { return &aqlprofile_api_; } // Return Loader API table const hsa_ven_amd_loader_1_00_pfn_t* LoaderApi() const { return &loader_api_; } + // Methods for system-clock/ns conversion and timestamp in 'ns' + timestamp_t SysclockToNs(const timestamp_t& sysclock) const { return timer_->sysclock_to_ns(sysclock); } + timestamp_t NsToSysclock(const timestamp_t& time) const { return timer_->ns_to_sysclock(time); } + timestamp_t TimestampNs() const { return timer_->timestamp_ns(); } + + timestamp_t GetSysTimeout() const { return timeout_; } + static timestamp_t GetTimeoutNs() { return timeout_ns_; } + static void SetTimeoutNs(const timestamp_t& time) { + std::lock_guard lck(mutex_); + timeout_ns_ = time; + if (instance_ != NULL) instance_->timeout_ = instance_->timer_->ns_to_sysclock(time); + } + private: // System agents iterating callback static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data); @@ -243,17 +307,20 @@ class HsaRsrcFactory { // Constructor of the class. Will initialize the Hsa Runtime and // query the system topology to get the list of Cpu and Gpu devices - HsaRsrcFactory(bool initialize_hsa); + explicit HsaRsrcFactory(bool initialize_hsa); // Destructor of the class ~HsaRsrcFactory(); - // HSA was initialized - const bool initialize_hsa_; - // Add an instance of AgentInfo representing a Hsa Gpu agent const AgentInfo* AddAgentInfo(const hsa_agent_t agent); + // To mmap command buffer memory + static const bool CMD_MEMORY_MMAP = false; + + // HSA was initialized + const bool initialize_hsa_; + static HsaRsrcFactory* instance_; static mutex_t mutex_; @@ -273,9 +340,20 @@ class HsaRsrcFactory { // Loader API table hsa_ven_amd_loader_1_00_pfn_t loader_api_; + + // System timeout, ns + static timestamp_t timeout_ns_; + // System timeout, sysclock + timestamp_t timeout_; + + // HSA timer + HsaTimer* timer_; + + // CPU/kern-arg memory pools + hsa_amd_memory_pool_t *cpu_pool_; + hsa_amd_memory_pool_t *kern_arg_pool_; }; } // namespace util -} // namespace roctracer -#endif // _HSA_RSRC_FACTORY_H_ +#endif // SRC_UTIL_HSA_RSRC_FACTORY_H_ diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index c0ad578c00..d7d658451a 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -306,6 +306,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, } else { ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HSA_API, hsa_api_callback, NULL)); } + ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HSA_API)); printf(")\n"); }