From 69a480a971e2fc37b36077084fc98686e8cfc348 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 13 Mar 2019 15:54:47 -0500 Subject: [PATCH] instance creation fix --- src/core/roctracer.cpp | 4 +- src/proxy/tracker.h | 16 +++- src/util/hsa_rsrc_factory.cpp | 173 +++++++++++++++++++++++++--------- src/util/hsa_rsrc_factory.h | 66 +++++++++++-- src/util/logger.h | 35 ++++--- test/CMakeLists.txt | 2 +- 6 files changed, 223 insertions(+), 73 deletions(-) diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index acb94c0cfd..b2f4fbcc59 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -435,7 +435,7 @@ hsa_status_t hsa_amd_memory_async_copy_rect_interceptor( } util::Logger::mutex_t util::Logger::mutex_; -util::Logger* util::Logger::instance_ = NULL; +std::atomic util::Logger::instance_{}; MemoryPool* memory_pool = NULL; typedef std::recursive_mutex memory_pool_mutex_t; memory_pool_mutex_t memory_pool_mutex; @@ -445,7 +445,7 @@ std::atomic HipLoader::instance_{}; std::atomic HccLoader::instance_{}; } -proxy::Tracker* proxy::Tracker::instance_ = NULL; +std::atomic proxy::Tracker::instance_{}; proxy::Tracker::mutex_t proxy::Tracker::glob_mutex_; proxy::Tracker::counter_t proxy::Tracker::counter_ = 0; diff --git a/src/proxy/tracker.h b/src/proxy/tracker.h index 69c71551cb..190d6caf60 100644 --- a/src/proxy/tracker.h +++ b/src/proxy/tracker.h @@ -71,13 +71,19 @@ class Tracker { static Tracker* Create() { std::lock_guard lck(glob_mutex_); - if (instance_ == NULL) instance_ = new Tracker; - return instance_; + Tracker* obj = instance_.load(std::memory_order_relaxed); + if (obj == NULL) { + obj = new Tracker; + if (obj == NULL) EXC_ABORT(HSA_STATUS_ERROR, "Tracker creation failed"); + instance_.store(obj, std::memory_order_release); + } + return obj; } static Tracker& Instance() { - if (instance_ == NULL) instance_ = Create(); - return *instance_; + Tracker* obj = instance_.load(std::memory_order_acquire); + if (obj == NULL) obj = Create(); + return *obj; } static void Destroy() { @@ -257,7 +263,7 @@ class Tracker { } // instance - static Tracker* instance_; + static std::atomic instance_; static mutex_t glob_mutex_; static counter_t counter_; diff --git a/src/util/hsa_rsrc_factory.cpp b/src/util/hsa_rsrc_factory.cpp index 1d0f6c6d4f..97e599b3d8 100644 --- a/src/util/hsa_rsrc_factory.cpp +++ b/src/util/hsa_rsrc_factory.cpp @@ -76,13 +76,13 @@ static hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool return HSA_STATUS_ERROR_INVALID_ARGUMENT; } - err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment); + err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment); CHECK_STATUS("hsa_amd_memory_pool_get_info", err); if (HSA_AMD_SEGMENT_GLOBAL != segment) { return HSA_STATUS_SUCCESS; } - err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag); + err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag); CHECK_STATUS("hsa_amd_memory_pool_get_info", err); uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT; @@ -116,14 +116,16 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize cpu_pool_ = NULL; kern_arg_pool_ = NULL; + InitHsaApiTable(NULL); + // Initialize the Hsa Runtime if (initialize_hsa_) { - status = hsa_init(); + status = hsa_api_.hsa_init(); CHECK_STATUS("Error in hsa_init", status); } // Discover the set of Gpu devices available on the platform - status = hsa_iterate_agents(GetHsaAgentsCallback, this); + status = hsa_api_.hsa_iterate_agents(GetHsaAgentsCallback, this); CHECK_STATUS("Error Calling hsa_iterate_agents", status); if (cpu_pool_ == NULL) CHECK_STATUS("CPU memory pool is not found", HSA_STATUS_ERROR); if (kern_arg_pool_ == NULL) CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR); @@ -133,13 +135,13 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize #ifdef ROCP_LD_AQLPROFILE status = LoadAqlProfileLib(&aqlprofile_api_); #else - status = hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, hsa_ven_amd_aqlprofile_VERSION_MAJOR, sizeof(aqlprofile_api_), &aqlprofile_api_); + status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, hsa_ven_amd_aqlprofile_VERSION_MAJOR, sizeof(aqlprofile_api_), &aqlprofile_api_); #endif CHECK_STATUS("aqlprofile API table load failed", status); // Get Loader API table loader_api_ = {0}; - status = hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, sizeof(loader_api_), &loader_api_); + status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, sizeof(loader_api_), &loader_api_); CHECK_STATUS("loader API table query failed", status); // Instantiate HSA timer @@ -157,11 +159,93 @@ HsaRsrcFactory::~HsaRsrcFactory() { for (auto p : cpu_list_) delete p; for (auto p : gpu_list_) delete p; if (initialize_hsa_) { - hsa_status_t status = hsa_shut_down(); + hsa_status_t status = hsa_api_.hsa_shut_down(); CHECK_STATUS("Error in hsa_shut_down", status); } } +void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) { + std::lock_guard lck(mutex_); + + if (hsa_api_.hsa_init == NULL) { + if (table != NULL) { + hsa_api_.hsa_init = table->core_->hsa_init_fn; + hsa_api_.hsa_shut_down = table->core_->hsa_shut_down_fn; + hsa_api_.hsa_agent_get_info = table->core_->hsa_agent_get_info_fn; + + hsa_api_.hsa_iterate_agents = table->core_->hsa_iterate_agents_fn; + + hsa_api_.hsa_queue_create = table->core_->hsa_queue_create_fn; + hsa_api_.hsa_queue_destroy = table->core_->hsa_queue_destroy_fn; + hsa_api_.hsa_queue_load_write_index_relaxed = table->core_->hsa_queue_load_write_index_relaxed_fn; + hsa_api_.hsa_queue_store_write_index_relaxed = table->core_->hsa_queue_store_write_index_relaxed_fn; + hsa_api_.hsa_queue_load_read_index_relaxed = table->core_->hsa_queue_load_read_index_relaxed_fn; + hsa_api_.hsa_signal_create = table->core_->hsa_signal_create_fn; + hsa_api_.hsa_signal_destroy = table->core_->hsa_signal_destroy_fn; + hsa_api_.hsa_signal_load_relaxed = table->core_->hsa_signal_load_relaxed_fn; + hsa_api_.hsa_signal_store_relaxed = table->core_->hsa_signal_store_relaxed_fn; + hsa_api_.hsa_signal_store_screlease = table->core_->hsa_signal_store_screlease_fn; + hsa_api_.hsa_signal_wait_scacquire = table->core_->hsa_signal_wait_scacquire_fn; + + hsa_api_.hsa_system_get_major_extension_table = table->core_->hsa_system_get_major_extension_table_fn; + + hsa_api_.hsa_code_object_reader_create_from_file = table->core_->hsa_code_object_reader_create_from_file_fn; + hsa_api_.hsa_executable_create_alt = table->core_->hsa_executable_create_alt_fn; + hsa_api_.hsa_executable_load_agent_code_object = table->core_->hsa_executable_load_agent_code_object_fn; + hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn; + hsa_api_.hsa_executable_get_symbol = table->core_->hsa_executable_get_symbol_fn; + + hsa_api_.hsa_amd_agent_iterate_memory_pools = table->amd_ext_->hsa_amd_agent_iterate_memory_pools_fn; + hsa_api_.hsa_amd_memory_pool_get_info = table->amd_ext_->hsa_amd_memory_pool_get_info_fn; + hsa_api_.hsa_amd_memory_pool_allocate = table->amd_ext_->hsa_amd_memory_pool_allocate_fn; + hsa_api_.hsa_amd_agents_allow_access = table->amd_ext_->hsa_amd_agents_allow_access_fn; + + hsa_api_.hsa_amd_memory_async_copy = table->amd_ext_->hsa_amd_memory_async_copy_fn; + + hsa_api_.hsa_amd_signal_async_handler = table->amd_ext_->hsa_amd_signal_async_handler_fn; + hsa_api_.hsa_amd_profiling_get_async_copy_time = table->amd_ext_->hsa_amd_profiling_get_async_copy_time_fn; + hsa_api_.hsa_amd_profiling_get_dispatch_time = table->amd_ext_->hsa_amd_profiling_get_dispatch_time_fn; + } else { + hsa_api_.hsa_init = hsa_init; + hsa_api_.hsa_shut_down = hsa_shut_down; + hsa_api_.hsa_agent_get_info = hsa_agent_get_info; + + hsa_api_.hsa_iterate_agents = hsa_iterate_agents; + + hsa_api_.hsa_queue_create = hsa_queue_create; + hsa_api_.hsa_queue_destroy = hsa_queue_destroy; + hsa_api_.hsa_queue_load_write_index_relaxed = hsa_queue_load_write_index_relaxed; + hsa_api_.hsa_queue_store_write_index_relaxed = hsa_queue_store_write_index_relaxed; + hsa_api_.hsa_queue_load_read_index_relaxed = hsa_queue_load_read_index_relaxed; + hsa_api_.hsa_signal_create = hsa_signal_create; + hsa_api_.hsa_signal_destroy = hsa_signal_destroy; + hsa_api_.hsa_signal_store_relaxed = hsa_signal_store_relaxed; + hsa_api_.hsa_signal_wait_scacquire = hsa_signal_wait_scacquire; + + hsa_api_.hsa_amd_agent_iterate_memory_pools = hsa_amd_agent_iterate_memory_pools; + hsa_api_.hsa_amd_memory_pool_get_info = hsa_amd_memory_pool_get_info; + hsa_api_.hsa_amd_memory_pool_allocate = hsa_amd_memory_pool_allocate; + hsa_api_.hsa_amd_agents_allow_access = hsa_amd_agents_allow_access; + + hsa_api_.hsa_amd_memory_async_copy = hsa_amd_memory_async_copy; + + hsa_api_.hsa_system_get_major_extension_table = hsa_system_get_major_extension_table; + + hsa_api_.hsa_code_object_reader_create_from_file = hsa_code_object_reader_create_from_file; + hsa_api_.hsa_executable_create_alt = hsa_executable_create_alt; + hsa_api_.hsa_executable_load_agent_code_object = hsa_executable_load_agent_code_object; + hsa_api_.hsa_executable_freeze = hsa_executable_freeze; + hsa_api_.hsa_executable_get_symbol = hsa_executable_get_symbol; + + hsa_api_.hsa_amd_signal_async_handler = hsa_amd_signal_async_handler; + hsa_api_.hsa_amd_profiling_get_async_copy_time = hsa_amd_profiling_get_async_copy_time; + hsa_api_.hsa_amd_profiling_get_dispatch_time = hsa_amd_profiling_get_dispatch_time; + hsa_api_.hsa_signal_load_relaxed = hsa_signal_load_relaxed; + hsa_api_.hsa_signal_store_screlease = hsa_signal_store_screlease; + } + } +} + hsa_status_t HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) { void* handle = dlopen(kAqlProfileLib, RTLD_NOW); if (handle == NULL) { @@ -203,7 +287,7 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) { AgentInfo* agent_info = NULL; hsa_device_type_t type; - status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type); + status = hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type); CHECK_STATUS("Error Calling hsa_agent_get_info", status); if (type == HSA_DEVICE_TYPE_CPU) { @@ -212,9 +296,9 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) { agent_info->dev_type = HSA_DEVICE_TYPE_CPU; agent_info->dev_index = cpu_list_.size(); - status = hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->cpu_pool); + status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->cpu_pool); if ((status == HSA_STATUS_INFO_BREAK) && (cpu_pool_ == NULL)) cpu_pool_ = &agent_info->cpu_pool; - status = hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool, &agent_info->kern_arg_pool); + status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool, &agent_info->kern_arg_pool); if ((status == HSA_STATUS_INFO_BREAK) && (kern_arg_pool_ == NULL)) kern_arg_pool_ = &agent_info->kern_arg_pool; agent_info->gpu_pool = {}; @@ -226,28 +310,28 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) { agent_info = new AgentInfo{}; agent_info->dev_id = agent; agent_info->dev_type = HSA_DEVICE_TYPE_GPU; - hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name); + hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name); strncpy(agent_info->gfxip, agent_info->name, 4); agent_info->gfxip[4] = '\0'; - hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size); - hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size); - hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile); + hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size); + hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size); + hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile); agent_info->is_apu = (agent_info->profile == HSA_PROFILE_FULL) ? true : false; - hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), + hsa_api_.hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), &agent_info->cu_num); - hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU), + hsa_api_.hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU), &agent_info->waves_per_cu); - hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), + hsa_api_.hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), &agent_info->simds_per_cu); - hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), + hsa_api_.hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), &agent_info->se_num); - hsa_agent_get_info(agent, + hsa_api_.hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE), &agent_info->shader_arrays_per_se); agent_info->cpu_pool = {}; agent_info->kern_arg_pool = {}; - status = hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool); + status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool); CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(gpu pool)", status); // Set GPU index @@ -338,7 +422,7 @@ bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue) { hsa_status_t status; - status = hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL, + status = hsa_api_.hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX, UINT32_MAX, queue); return (status == HSA_STATUS_SUCCESS); } @@ -349,7 +433,7 @@ bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, // @return bool true if successful, false otherwise bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) { hsa_status_t status; - status = hsa_signal_create(value, 0, NULL, signal); + status = hsa_api_.hsa_signal_create(value, 0, NULL, signal); return (status == HSA_STATUS_SUCCESS); } @@ -362,7 +446,7 @@ uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t hsa_status_t status = HSA_STATUS_ERROR; uint8_t* buffer = NULL; size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - status = hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, reinterpret_cast(&buffer)); + status = hsa_api_.hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, reinterpret_cast(&buffer)); uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL; return ptr; } @@ -377,11 +461,11 @@ uint8_t* HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size uint8_t* buffer = NULL; if (!cpu_agents_.empty()) { size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - status = hsa_amd_memory_pool_allocate(*kern_arg_pool_, size, 0, reinterpret_cast(&buffer)); + status = hsa_api_.hsa_amd_memory_pool_allocate(*kern_arg_pool_, size, 0, reinterpret_cast(&buffer)); // Both the CPU and GPU can access the kernel arguments if (status == HSA_STATUS_SUCCESS) { hsa_agent_t ag_list[1] = {agent_info->dev_id}; - status = hsa_amd_agents_allow_access(1, ag_list, NULL, buffer); + status = hsa_api_.hsa_amd_agents_allow_access(1, ag_list, NULL, buffer); } } uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL; @@ -397,11 +481,11 @@ uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t s uint8_t* buffer = NULL; size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; if (!cpu_agents_.empty()) { - status = hsa_amd_memory_pool_allocate(*cpu_pool_, size, 0, reinterpret_cast(&buffer)); + status = hsa_api_.hsa_amd_memory_pool_allocate(*cpu_pool_, size, 0, reinterpret_cast(&buffer)); // Both the CPU and GPU can access the memory if (status == HSA_STATUS_SUCCESS) { hsa_agent_t ag_list[1] = {agent_info->dev_id}; - status = hsa_amd_agents_allow_access(1, ag_list, NULL, buffer); + status = hsa_api_.hsa_amd_agents_allow_access(1, ag_list, NULL, buffer); } } uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL; @@ -425,12 +509,12 @@ uint8_t* HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t s void HsaRsrcFactory::SignalWait(const hsa_signal_t& signal) const { while (1) { const hsa_signal_value_t signal_value = - hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, timeout_, HSA_WAIT_STATE_BLOCKED); + hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, timeout_, HSA_WAIT_STATE_BLOCKED); if (signal_value == 0) { break; } else { - if (signal_value == 1) { WARN_LOGGING("signal waiting..."); } - else { EXC_RAISING(HSA_STATUS_ERROR, "hsa_signal_wait_scacquire (" << signal_value << ")"); } + if (signal_value == 1) WARN_LOGGING("signal waiting..."); + else EXC_RAISING(HSA_STATUS_ERROR, "hsa_signal_wait_scacquire (" << signal_value << ")"); } } } @@ -438,7 +522,7 @@ void HsaRsrcFactory::SignalWait(const hsa_signal_t& signal) const { // Wait signal with signal value restore void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const { SignalWait(signal); - hsa_signal_store_relaxed(const_cast(signal), signal_value); + hsa_api_.hsa_signal_store_relaxed(const_cast(signal), signal_value); } // Copy data from GPU to host memory @@ -446,12 +530,12 @@ bool HsaRsrcFactory::Memcpy(const hsa_agent_t& agent, void* dst, const void* src hsa_status_t status = HSA_STATUS_ERROR; if (!cpu_agents_.empty()) { hsa_signal_t s = {}; - status = hsa_signal_create(1, 0, NULL, &s); + status = hsa_api_.hsa_signal_create(1, 0, NULL, &s); CHECK_STATUS("hsa_signal_create()", status); - status = hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s); + status = hsa_api_.hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s); CHECK_STATUS("hsa_amd_memory_async_copy()", status); SignalWait(s); - status = hsa_signal_destroy(s); + status = hsa_api_.hsa_signal_destroy(s); CHECK_STATUS("hsa_signal_destroy()", status); } return (status == HSA_STATUS_SUCCESS); @@ -493,29 +577,29 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br // Create code object reader hsa_code_object_reader_t code_obj_rdr = {0}; - status = hsa_code_object_reader_create_from_file(file_handle, &code_obj_rdr); + status = hsa_api_.hsa_code_object_reader_create_from_file(file_handle, &code_obj_rdr); if (status != HSA_STATUS_SUCCESS) { std::cerr << "Failed to create code object reader '" << filename << "'" << std::endl; return false; } // Create executable. - status = hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + status = hsa_api_.hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, executable); CHECK_STATUS("Error in creating executable object", status); // Load code object. - status = hsa_executable_load_agent_code_object(*executable, agent_info->dev_id, code_obj_rdr, + status = hsa_api_.hsa_executable_load_agent_code_object(*executable, agent_info->dev_id, code_obj_rdr, NULL, NULL); CHECK_STATUS("Error in loading executable object", status); // Freeze executable. - status = hsa_executable_freeze(*executable, ""); + status = hsa_api_.hsa_executable_freeze(*executable, ""); CHECK_STATUS("Error in freezing executable object", status); // Get symbol handle. hsa_executable_symbol_t kernelSymbol; - status = hsa_executable_get_symbol(*executable, NULL, kernel_name, agent_info->dev_id, 0, + status = hsa_api_.hsa_executable_get_symbol(*executable, NULL, kernel_name, agent_info->dev_id, 0, &kernelSymbol); CHECK_STATUS("Error in looking up kernel symbol", status); @@ -553,9 +637,9 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) { const uint32_t slot_size_b = CMD_SLOT_SIZE_B; // adevance command queue - const uint64_t write_idx = hsa_queue_load_write_index_relaxed(queue); - hsa_queue_store_write_index_relaxed(queue, write_idx + 1); - while ((write_idx - hsa_queue_load_read_index_relaxed(queue)) >= queue->size) { + const uint64_t write_idx = hsa_api_.hsa_queue_load_write_index_relaxed(queue); + hsa_api_.hsa_queue_store_write_index_relaxed(queue, write_idx + 1); + while ((write_idx - hsa_api_.hsa_queue_load_read_index_relaxed(queue)) >= queue->size) { sched_yield(); } @@ -572,7 +656,7 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) { header_atomic_ptr->store(slot_data[0], std::memory_order_release); // ringdoor bell - hsa_signal_store_relaxed(queue->doorbell_signal, write_idx); + hsa_api_.hsa_signal_store_relaxed(queue->doorbell_signal, write_idx); return write_idx; } @@ -594,8 +678,9 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t s return write_idx; } -HsaRsrcFactory* HsaRsrcFactory::instance_ = NULL; +std::atomic HsaRsrcFactory::instance_{}; HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_; HsaRsrcFactory::timestamp_t HsaRsrcFactory::timeout_ns_ = HsaTimer::TIMESTAMP_MAX; +hsa_pfn_t HsaRsrcFactory::hsa_api_{}; } // namespace util diff --git a/src/util/hsa_rsrc_factory.h b/src/util/hsa_rsrc_factory.h index 8d8b914dc7..d99240d34e 100644 --- a/src/util/hsa_rsrc_factory.h +++ b/src/util/hsa_rsrc_factory.h @@ -26,6 +26,7 @@ POSSIBILITY OF SUCH DAMAGE. #define SRC_UTIL_HSA_RSRC_FACTORY_H_ #include +#include #include #include #include @@ -35,6 +36,7 @@ POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #include #include @@ -68,6 +70,44 @@ static const size_t MEM_PAGE_BYTES = 0x1000; static const size_t MEM_PAGE_MASK = MEM_PAGE_BYTES - 1; typedef decltype(hsa_agent_t::handle) hsa_agent_handle_t; +struct hsa_pfn_t { + decltype(hsa_init)* hsa_init; + decltype(hsa_shut_down)* hsa_shut_down; + decltype(hsa_agent_get_info)* hsa_agent_get_info; + + decltype(hsa_iterate_agents)* hsa_iterate_agents; + + decltype(hsa_queue_create)* hsa_queue_create; + decltype(hsa_queue_destroy)* hsa_queue_destroy; + decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed; + decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed; + decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed; + decltype(hsa_signal_create)* hsa_signal_create; + decltype(hsa_signal_destroy)* hsa_signal_destroy; + decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed; + decltype(hsa_signal_wait_scacquire)* hsa_signal_wait_scacquire; + + decltype(hsa_amd_agent_iterate_memory_pools)* hsa_amd_agent_iterate_memory_pools; + decltype(hsa_amd_memory_pool_get_info)* hsa_amd_memory_pool_get_info; + decltype(hsa_amd_memory_pool_allocate)* hsa_amd_memory_pool_allocate; + decltype(hsa_amd_agents_allow_access)* hsa_amd_agents_allow_access; + decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy; + + decltype(hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table; + + decltype(hsa_code_object_reader_create_from_file)* hsa_code_object_reader_create_from_file; + decltype(hsa_executable_create_alt)* hsa_executable_create_alt; + decltype(hsa_executable_load_agent_code_object)* hsa_executable_load_agent_code_object; + decltype(hsa_executable_freeze)* hsa_executable_freeze; + decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol; + + decltype(hsa_amd_signal_async_handler)* hsa_amd_signal_async_handler; + decltype(hsa_amd_profiling_get_async_copy_time)* hsa_amd_profiling_get_async_copy_time; + decltype(hsa_amd_profiling_get_dispatch_time)* hsa_amd_profiling_get_dispatch_time; + decltype(hsa_signal_load_relaxed)* hsa_signal_load_relaxed; + decltype(hsa_signal_store_screlease)* hsa_signal_store_screlease; +}; + // Encapsulates information about a Hsa Agent such as its // handle, name, max queue size, max wavefront size, etc. struct AgentInfo { @@ -163,17 +203,20 @@ class HsaRsrcFactory { static HsaRsrcFactory* Create(bool initialize_hsa = true) { std::lock_guard lck(mutex_); - if (instance_ == NULL) { - instance_ = new HsaRsrcFactory(initialize_hsa); + HsaRsrcFactory* obj = instance_.load(std::memory_order_relaxed); + if (obj == NULL) { + obj = new HsaRsrcFactory(initialize_hsa); + instance_.store(obj, std::memory_order_release); } - return instance_; + return obj; } static HsaRsrcFactory& Instance() { - if (instance_ == NULL) instance_ = Create(false); - hsa_status_t status = (instance_ != NULL) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; + HsaRsrcFactory* obj = instance_.load(std::memory_order_acquire); + if (obj == NULL) obj = Create(false); + hsa_status_t status = (obj != NULL) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; CHECK_STATUS("HsaRsrcFactory::Instance() failed", status); - return *instance_; + return *obj; } static void Destroy() { @@ -275,6 +318,10 @@ class HsaRsrcFactory { static uint64_t Submit(hsa_queue_t* queue, const void* packet); static uint64_t Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes); + // Initialize HSA API table + void static InitHsaApiTable(HsaApiTable* table); + static const hsa_pfn_t* HsaApi() { return &hsa_api_; } + // Return AqlProfile API table typedef hsa_ven_amd_aqlprofile_pfn_t aqlprofile_pfn_t; const aqlprofile_pfn_t* AqlProfileApi() const { return &aqlprofile_api_; } @@ -292,7 +339,7 @@ class HsaRsrcFactory { static void SetTimeoutNs(const timestamp_t& time) { std::lock_guard lck(mutex_); timeout_ns_ = time; - if (instance_ != NULL) instance_->timeout_ = instance_->timer_->ns_to_sysclock(time); + if (instance_ != NULL) Instance().timeout_ = Instance().timer_->ns_to_sysclock(time); } private: @@ -321,7 +368,7 @@ class HsaRsrcFactory { // HSA was initialized const bool initialize_hsa_; - static HsaRsrcFactory* instance_; + static std::atomic instance_; static mutex_t mutex_; // Used to maintain a list of Hsa Gpu Agent Info @@ -335,6 +382,9 @@ class HsaRsrcFactory { // System agents map std::map agent_map_; + // HSA runtime API table + static hsa_pfn_t hsa_api_; + // AqlProfile API table aqlprofile_pfn_t aqlprofile_api_; diff --git a/src/util/logger.h b/src/util/logger.h index d0b9640737..5ed90c4a50 100644 --- a/src/util/logger.h +++ b/src/util/logger.h @@ -75,8 +75,16 @@ class Logger { static Logger* Create() { std::lock_guard lck(mutex_); - if (instance_ == NULL) instance_ = new Logger(); - return instance_; + Logger* obj = instance_.load(std::memory_order_relaxed); + if (obj == NULL) { + obj = new Logger(); + if (obj == NULL) { + std::cerr << "ROCTracer: log object creation failed" << std::endl << std::flush; + abort(); + } + instance_.store(obj, std::memory_order_release); + } + return obj; } static void Destroy() { @@ -86,8 +94,9 @@ class Logger { } static Logger& Instance() { - Create(); - return *instance_; + Logger* obj = instance_.load(std::memory_order_acquire); + if (obj == NULL) obj = Create(); + return *obj; } private: @@ -152,7 +161,7 @@ class Logger { bool messaging_; static mutex_t mutex_; - static Logger* instance_; + static std::atomic instance_; std::map message_; }; @@ -160,32 +169,32 @@ class Logger { } // namespace roctracer #define ERR_LOGGING(stream) \ - { \ + do { \ roctracer::util::Logger::Instance() << "error: " << roctracer::util::Logger::begm \ << stream << roctracer::util::Logger::endl; \ - } + } while(0) #define INFO_LOGGING(stream) \ - { \ + do { \ roctracer::util::Logger::Instance() << "info: " << roctracer::util::Logger::begm << stream \ << roctracer::util::Logger::endl; \ - } + } while(0) #define WARN_LOGGING(stream) \ - { \ + do { \ std::cerr << "ROCProfiler: " << stream << std::endl; \ roctracer::util::Logger::Instance() << "warning: " << roctracer::util::Logger::begm << stream \ << roctracer::util::Logger::endl; \ - } + } while(0) #ifdef DEBUG #define DBG_LOGGING(stream) \ - { \ + do { \ roctracer::util::Logger::Instance() << roctracer::util::Logger::begm << "debug: \"" \ << stream << "\"" < < < < \ " in " << __FUNCTION__ << " at " << __FILE__ << " line " << __LINE__ \ << roctracer::util::Logger::endl; \ - } + } while(0) #endif #endif // SRC_UTIL_LOGGER_H_ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 709e4c12b2..c00db39ceb 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -49,7 +49,7 @@ target_link_libraries ( ${TEST_LIB} ${ROCTRACER_TARGET} ${HSA_RUNTIME_LIB} c std ## Build HSA test execute_process ( COMMAND sh -xc "if [ ! -e ${TEST_DIR}/hsa ] ; then git clone https://github.com/ROCmSoftwarePlatform/hsa-class.git ${TEST_DIR}/hsa; fi" ) -execute_process ( COMMAND sh -xc "if [ -e ${TEST_DIR}/hsa ] ; then cd ${TEST_DIR}/hsa && git checkout e771707; fi" ) +execute_process ( COMMAND sh -xc "if [ -e ${TEST_DIR}/hsa ] ; then cd ${TEST_DIR}/hsa && git checkout 5b0d1af; fi" ) set ( TEST_DIR ${HSA_TEST_DIR} ) add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test/hsa )