Этот коммит содержится в:
Evgeny
2019-03-13 15:54:47 -05:00
родитель d2216d0f9c
Коммит 69a480a971
6 изменённых файлов: 223 добавлений и 73 удалений
+2 -2
Просмотреть файл
@@ -435,7 +435,7 @@ hsa_status_t hsa_amd_memory_async_copy_rect_interceptor(
}
util::Logger::mutex_t util::Logger::mutex_;
util::Logger* util::Logger::instance_ = NULL;
std::atomic<util::Logger*> util::Logger::instance_{};
MemoryPool* memory_pool = NULL;
typedef std::recursive_mutex memory_pool_mutex_t;
memory_pool_mutex_t memory_pool_mutex;
@@ -445,7 +445,7 @@ std::atomic<HipLoader*> HipLoader::instance_{};
std::atomic<HccLoader*> HccLoader::instance_{};
}
proxy::Tracker* proxy::Tracker::instance_ = NULL;
std::atomic<proxy::Tracker*> proxy::Tracker::instance_{};
proxy::Tracker::mutex_t proxy::Tracker::glob_mutex_;
proxy::Tracker::counter_t proxy::Tracker::counter_ = 0;
+11 -5
Просмотреть файл
@@ -71,13 +71,19 @@ class Tracker {
static Tracker* Create() {
std::lock_guard<mutex_t> lck(glob_mutex_);
if (instance_ == NULL) instance_ = new Tracker;
return instance_;
Tracker* obj = instance_.load(std::memory_order_relaxed);
if (obj == NULL) {
obj = new Tracker;
if (obj == NULL) EXC_ABORT(HSA_STATUS_ERROR, "Tracker creation failed");
instance_.store(obj, std::memory_order_release);
}
return obj;
}
static Tracker& Instance() {
if (instance_ == NULL) instance_ = Create();
return *instance_;
Tracker* obj = instance_.load(std::memory_order_acquire);
if (obj == NULL) obj = Create();
return *obj;
}
static void Destroy() {
@@ -257,7 +263,7 @@ class Tracker {
}
// instance
static Tracker* instance_;
static std::atomic<Tracker*> instance_;
static mutex_t glob_mutex_;
static counter_t counter_;
+129 -44
Просмотреть файл
@@ -76,13 +76,13 @@ static hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
CHECK_STATUS("hsa_amd_memory_pool_get_info", err);
if (HSA_AMD_SEGMENT_GLOBAL != segment) {
return HSA_STATUS_SUCCESS;
}
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
CHECK_STATUS("hsa_amd_memory_pool_get_info", err);
uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
@@ -116,14 +116,16 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize
cpu_pool_ = NULL;
kern_arg_pool_ = NULL;
InitHsaApiTable(NULL);
// Initialize the Hsa Runtime
if (initialize_hsa_) {
status = hsa_init();
status = hsa_api_.hsa_init();
CHECK_STATUS("Error in hsa_init", status);
}
// Discover the set of Gpu devices available on the platform
status = hsa_iterate_agents(GetHsaAgentsCallback, this);
status = hsa_api_.hsa_iterate_agents(GetHsaAgentsCallback, this);
CHECK_STATUS("Error Calling hsa_iterate_agents", status);
if (cpu_pool_ == NULL) CHECK_STATUS("CPU memory pool is not found", HSA_STATUS_ERROR);
if (kern_arg_pool_ == NULL) CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR);
@@ -133,13 +135,13 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize
#ifdef ROCP_LD_AQLPROFILE
status = LoadAqlProfileLib(&aqlprofile_api_);
#else
status = hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, hsa_ven_amd_aqlprofile_VERSION_MAJOR, sizeof(aqlprofile_api_), &aqlprofile_api_);
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, hsa_ven_amd_aqlprofile_VERSION_MAJOR, sizeof(aqlprofile_api_), &aqlprofile_api_);
#endif
CHECK_STATUS("aqlprofile API table load failed", status);
// Get Loader API table
loader_api_ = {0};
status = hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, sizeof(loader_api_), &loader_api_);
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, sizeof(loader_api_), &loader_api_);
CHECK_STATUS("loader API table query failed", status);
// Instantiate HSA timer
@@ -157,11 +159,93 @@ HsaRsrcFactory::~HsaRsrcFactory() {
for (auto p : cpu_list_) delete p;
for (auto p : gpu_list_) delete p;
if (initialize_hsa_) {
hsa_status_t status = hsa_shut_down();
hsa_status_t status = hsa_api_.hsa_shut_down();
CHECK_STATUS("Error in hsa_shut_down", status);
}
}
void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
std::lock_guard<mutex_t> lck(mutex_);
if (hsa_api_.hsa_init == NULL) {
if (table != NULL) {
hsa_api_.hsa_init = table->core_->hsa_init_fn;
hsa_api_.hsa_shut_down = table->core_->hsa_shut_down_fn;
hsa_api_.hsa_agent_get_info = table->core_->hsa_agent_get_info_fn;
hsa_api_.hsa_iterate_agents = table->core_->hsa_iterate_agents_fn;
hsa_api_.hsa_queue_create = table->core_->hsa_queue_create_fn;
hsa_api_.hsa_queue_destroy = table->core_->hsa_queue_destroy_fn;
hsa_api_.hsa_queue_load_write_index_relaxed = table->core_->hsa_queue_load_write_index_relaxed_fn;
hsa_api_.hsa_queue_store_write_index_relaxed = table->core_->hsa_queue_store_write_index_relaxed_fn;
hsa_api_.hsa_queue_load_read_index_relaxed = table->core_->hsa_queue_load_read_index_relaxed_fn;
hsa_api_.hsa_signal_create = table->core_->hsa_signal_create_fn;
hsa_api_.hsa_signal_destroy = table->core_->hsa_signal_destroy_fn;
hsa_api_.hsa_signal_load_relaxed = table->core_->hsa_signal_load_relaxed_fn;
hsa_api_.hsa_signal_store_relaxed = table->core_->hsa_signal_store_relaxed_fn;
hsa_api_.hsa_signal_store_screlease = table->core_->hsa_signal_store_screlease_fn;
hsa_api_.hsa_signal_wait_scacquire = table->core_->hsa_signal_wait_scacquire_fn;
hsa_api_.hsa_system_get_major_extension_table = table->core_->hsa_system_get_major_extension_table_fn;
hsa_api_.hsa_code_object_reader_create_from_file = table->core_->hsa_code_object_reader_create_from_file_fn;
hsa_api_.hsa_executable_create_alt = table->core_->hsa_executable_create_alt_fn;
hsa_api_.hsa_executable_load_agent_code_object = table->core_->hsa_executable_load_agent_code_object_fn;
hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn;
hsa_api_.hsa_executable_get_symbol = table->core_->hsa_executable_get_symbol_fn;
hsa_api_.hsa_amd_agent_iterate_memory_pools = table->amd_ext_->hsa_amd_agent_iterate_memory_pools_fn;
hsa_api_.hsa_amd_memory_pool_get_info = table->amd_ext_->hsa_amd_memory_pool_get_info_fn;
hsa_api_.hsa_amd_memory_pool_allocate = table->amd_ext_->hsa_amd_memory_pool_allocate_fn;
hsa_api_.hsa_amd_agents_allow_access = table->amd_ext_->hsa_amd_agents_allow_access_fn;
hsa_api_.hsa_amd_memory_async_copy = table->amd_ext_->hsa_amd_memory_async_copy_fn;
hsa_api_.hsa_amd_signal_async_handler = table->amd_ext_->hsa_amd_signal_async_handler_fn;
hsa_api_.hsa_amd_profiling_get_async_copy_time = table->amd_ext_->hsa_amd_profiling_get_async_copy_time_fn;
hsa_api_.hsa_amd_profiling_get_dispatch_time = table->amd_ext_->hsa_amd_profiling_get_dispatch_time_fn;
} else {
hsa_api_.hsa_init = hsa_init;
hsa_api_.hsa_shut_down = hsa_shut_down;
hsa_api_.hsa_agent_get_info = hsa_agent_get_info;
hsa_api_.hsa_iterate_agents = hsa_iterate_agents;
hsa_api_.hsa_queue_create = hsa_queue_create;
hsa_api_.hsa_queue_destroy = hsa_queue_destroy;
hsa_api_.hsa_queue_load_write_index_relaxed = hsa_queue_load_write_index_relaxed;
hsa_api_.hsa_queue_store_write_index_relaxed = hsa_queue_store_write_index_relaxed;
hsa_api_.hsa_queue_load_read_index_relaxed = hsa_queue_load_read_index_relaxed;
hsa_api_.hsa_signal_create = hsa_signal_create;
hsa_api_.hsa_signal_destroy = hsa_signal_destroy;
hsa_api_.hsa_signal_store_relaxed = hsa_signal_store_relaxed;
hsa_api_.hsa_signal_wait_scacquire = hsa_signal_wait_scacquire;
hsa_api_.hsa_amd_agent_iterate_memory_pools = hsa_amd_agent_iterate_memory_pools;
hsa_api_.hsa_amd_memory_pool_get_info = hsa_amd_memory_pool_get_info;
hsa_api_.hsa_amd_memory_pool_allocate = hsa_amd_memory_pool_allocate;
hsa_api_.hsa_amd_agents_allow_access = hsa_amd_agents_allow_access;
hsa_api_.hsa_amd_memory_async_copy = hsa_amd_memory_async_copy;
hsa_api_.hsa_system_get_major_extension_table = hsa_system_get_major_extension_table;
hsa_api_.hsa_code_object_reader_create_from_file = hsa_code_object_reader_create_from_file;
hsa_api_.hsa_executable_create_alt = hsa_executable_create_alt;
hsa_api_.hsa_executable_load_agent_code_object = hsa_executable_load_agent_code_object;
hsa_api_.hsa_executable_freeze = hsa_executable_freeze;
hsa_api_.hsa_executable_get_symbol = hsa_executable_get_symbol;
hsa_api_.hsa_amd_signal_async_handler = hsa_amd_signal_async_handler;
hsa_api_.hsa_amd_profiling_get_async_copy_time = hsa_amd_profiling_get_async_copy_time;
hsa_api_.hsa_amd_profiling_get_dispatch_time = hsa_amd_profiling_get_dispatch_time;
hsa_api_.hsa_signal_load_relaxed = hsa_signal_load_relaxed;
hsa_api_.hsa_signal_store_screlease = hsa_signal_store_screlease;
}
}
}
hsa_status_t HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) {
void* handle = dlopen(kAqlProfileLib, RTLD_NOW);
if (handle == NULL) {
@@ -203,7 +287,7 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
AgentInfo* agent_info = NULL;
hsa_device_type_t type;
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
status = hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
CHECK_STATUS("Error Calling hsa_agent_get_info", status);
if (type == HSA_DEVICE_TYPE_CPU) {
@@ -212,9 +296,9 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
agent_info->dev_type = HSA_DEVICE_TYPE_CPU;
agent_info->dev_index = cpu_list_.size();
status = hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->cpu_pool);
status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->cpu_pool);
if ((status == HSA_STATUS_INFO_BREAK) && (cpu_pool_ == NULL)) cpu_pool_ = &agent_info->cpu_pool;
status = hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool, &agent_info->kern_arg_pool);
status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool, &agent_info->kern_arg_pool);
if ((status == HSA_STATUS_INFO_BREAK) && (kern_arg_pool_ == NULL)) kern_arg_pool_ = &agent_info->kern_arg_pool;
agent_info->gpu_pool = {};
@@ -226,28 +310,28 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
agent_info = new AgentInfo{};
agent_info->dev_id = agent;
agent_info->dev_type = HSA_DEVICE_TYPE_GPU;
hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name);
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name);
strncpy(agent_info->gfxip, agent_info->name, 4);
agent_info->gfxip[4] = '\0';
hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size);
hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size);
hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile);
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size);
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size);
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile);
agent_info->is_apu = (agent_info->profile == HSA_PROFILE_FULL) ? true : false;
hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT),
hsa_api_.hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT),
&agent_info->cu_num);
hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU),
hsa_api_.hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU),
&agent_info->waves_per_cu);
hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU),
hsa_api_.hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU),
&agent_info->simds_per_cu);
hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES),
hsa_api_.hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES),
&agent_info->se_num);
hsa_agent_get_info(agent,
hsa_api_.hsa_agent_get_info(agent,
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE),
&agent_info->shader_arrays_per_se);
agent_info->cpu_pool = {};
agent_info->kern_arg_pool = {};
status = hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool);
status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool);
CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(gpu pool)", status);
// Set GPU index
@@ -338,7 +422,7 @@ bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info)
bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts,
hsa_queue_t** queue) {
hsa_status_t status;
status = hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL,
status = hsa_api_.hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL,
UINT32_MAX, UINT32_MAX, queue);
return (status == HSA_STATUS_SUCCESS);
}
@@ -349,7 +433,7 @@ bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts,
// @return bool true if successful, false otherwise
bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) {
hsa_status_t status;
status = hsa_signal_create(value, 0, NULL, signal);
status = hsa_api_.hsa_signal_create(value, 0, NULL, signal);
return (status == HSA_STATUS_SUCCESS);
}
@@ -362,7 +446,7 @@ uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t
hsa_status_t status = HSA_STATUS_ERROR;
uint8_t* buffer = NULL;
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
status = hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, reinterpret_cast<void**>(&buffer));
status = hsa_api_.hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, reinterpret_cast<void**>(&buffer));
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
return ptr;
}
@@ -377,11 +461,11 @@ uint8_t* HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size
uint8_t* buffer = NULL;
if (!cpu_agents_.empty()) {
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
status = hsa_amd_memory_pool_allocate(*kern_arg_pool_, size, 0, reinterpret_cast<void**>(&buffer));
status = hsa_api_.hsa_amd_memory_pool_allocate(*kern_arg_pool_, size, 0, reinterpret_cast<void**>(&buffer));
// Both the CPU and GPU can access the kernel arguments
if (status == HSA_STATUS_SUCCESS) {
hsa_agent_t ag_list[1] = {agent_info->dev_id};
status = hsa_amd_agents_allow_access(1, ag_list, NULL, buffer);
status = hsa_api_.hsa_amd_agents_allow_access(1, ag_list, NULL, buffer);
}
}
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
@@ -397,11 +481,11 @@ uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t s
uint8_t* buffer = NULL;
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
if (!cpu_agents_.empty()) {
status = hsa_amd_memory_pool_allocate(*cpu_pool_, size, 0, reinterpret_cast<void**>(&buffer));
status = hsa_api_.hsa_amd_memory_pool_allocate(*cpu_pool_, size, 0, reinterpret_cast<void**>(&buffer));
// Both the CPU and GPU can access the memory
if (status == HSA_STATUS_SUCCESS) {
hsa_agent_t ag_list[1] = {agent_info->dev_id};
status = hsa_amd_agents_allow_access(1, ag_list, NULL, buffer);
status = hsa_api_.hsa_amd_agents_allow_access(1, ag_list, NULL, buffer);
}
}
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
@@ -425,12 +509,12 @@ uint8_t* HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t s
void HsaRsrcFactory::SignalWait(const hsa_signal_t& signal) const {
while (1) {
const hsa_signal_value_t signal_value =
hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, timeout_, HSA_WAIT_STATE_BLOCKED);
hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, timeout_, HSA_WAIT_STATE_BLOCKED);
if (signal_value == 0) {
break;
} else {
if (signal_value == 1) { WARN_LOGGING("signal waiting..."); }
else { EXC_RAISING(HSA_STATUS_ERROR, "hsa_signal_wait_scacquire (" << signal_value << ")"); }
if (signal_value == 1) WARN_LOGGING("signal waiting...");
else EXC_RAISING(HSA_STATUS_ERROR, "hsa_signal_wait_scacquire (" << signal_value << ")");
}
}
}
@@ -438,7 +522,7 @@ void HsaRsrcFactory::SignalWait(const hsa_signal_t& signal) const {
// Wait signal with signal value restore
void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const {
SignalWait(signal);
hsa_signal_store_relaxed(const_cast<hsa_signal_t&>(signal), signal_value);
hsa_api_.hsa_signal_store_relaxed(const_cast<hsa_signal_t&>(signal), signal_value);
}
// Copy data from GPU to host memory
@@ -446,12 +530,12 @@ bool HsaRsrcFactory::Memcpy(const hsa_agent_t& agent, void* dst, const void* src
hsa_status_t status = HSA_STATUS_ERROR;
if (!cpu_agents_.empty()) {
hsa_signal_t s = {};
status = hsa_signal_create(1, 0, NULL, &s);
status = hsa_api_.hsa_signal_create(1, 0, NULL, &s);
CHECK_STATUS("hsa_signal_create()", status);
status = hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s);
status = hsa_api_.hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s);
CHECK_STATUS("hsa_amd_memory_async_copy()", status);
SignalWait(s);
status = hsa_signal_destroy(s);
status = hsa_api_.hsa_signal_destroy(s);
CHECK_STATUS("hsa_signal_destroy()", status);
}
return (status == HSA_STATUS_SUCCESS);
@@ -493,29 +577,29 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br
// Create code object reader
hsa_code_object_reader_t code_obj_rdr = {0};
status = hsa_code_object_reader_create_from_file(file_handle, &code_obj_rdr);
status = hsa_api_.hsa_code_object_reader_create_from_file(file_handle, &code_obj_rdr);
if (status != HSA_STATUS_SUCCESS) {
std::cerr << "Failed to create code object reader '" << filename << "'" << std::endl;
return false;
}
// Create executable.
status = hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
status = hsa_api_.hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
NULL, executable);
CHECK_STATUS("Error in creating executable object", status);
// Load code object.
status = hsa_executable_load_agent_code_object(*executable, agent_info->dev_id, code_obj_rdr,
status = hsa_api_.hsa_executable_load_agent_code_object(*executable, agent_info->dev_id, code_obj_rdr,
NULL, NULL);
CHECK_STATUS("Error in loading executable object", status);
// Freeze executable.
status = hsa_executable_freeze(*executable, "");
status = hsa_api_.hsa_executable_freeze(*executable, "");
CHECK_STATUS("Error in freezing executable object", status);
// Get symbol handle.
hsa_executable_symbol_t kernelSymbol;
status = hsa_executable_get_symbol(*executable, NULL, kernel_name, agent_info->dev_id, 0,
status = hsa_api_.hsa_executable_get_symbol(*executable, NULL, kernel_name, agent_info->dev_id, 0,
&kernelSymbol);
CHECK_STATUS("Error in looking up kernel symbol", status);
@@ -553,9 +637,9 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) {
const uint32_t slot_size_b = CMD_SLOT_SIZE_B;
// adevance command queue
const uint64_t write_idx = hsa_queue_load_write_index_relaxed(queue);
hsa_queue_store_write_index_relaxed(queue, write_idx + 1);
while ((write_idx - hsa_queue_load_read_index_relaxed(queue)) >= queue->size) {
const uint64_t write_idx = hsa_api_.hsa_queue_load_write_index_relaxed(queue);
hsa_api_.hsa_queue_store_write_index_relaxed(queue, write_idx + 1);
while ((write_idx - hsa_api_.hsa_queue_load_read_index_relaxed(queue)) >= queue->size) {
sched_yield();
}
@@ -572,7 +656,7 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) {
header_atomic_ptr->store(slot_data[0], std::memory_order_release);
// ringdoor bell
hsa_signal_store_relaxed(queue->doorbell_signal, write_idx);
hsa_api_.hsa_signal_store_relaxed(queue->doorbell_signal, write_idx);
return write_idx;
}
@@ -594,8 +678,9 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t s
return write_idx;
}
HsaRsrcFactory* HsaRsrcFactory::instance_ = NULL;
std::atomic<HsaRsrcFactory*> HsaRsrcFactory::instance_{};
HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_;
HsaRsrcFactory::timestamp_t HsaRsrcFactory::timeout_ns_ = HsaTimer::TIMESTAMP_MAX;
hsa_pfn_t HsaRsrcFactory::hsa_api_{};
} // namespace util
+58 -8
Просмотреть файл
@@ -26,6 +26,7 @@ POSSIBILITY OF SUCH DAMAGE.
#define SRC_UTIL_HSA_RSRC_FACTORY_H_
#include <hsa.h>
#include <hsa_api_trace.h>
#include <hsa_ext_amd.h>
#include <hsa_ext_finalize.h>
#include <hsa_ven_amd_aqlprofile.h>
@@ -35,6 +36,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include <stdlib.h>
#include <string.h>
#include <atomic>
#include <iostream>
#include <mutex>
#include <map>
@@ -68,6 +70,44 @@ static const size_t MEM_PAGE_BYTES = 0x1000;
static const size_t MEM_PAGE_MASK = MEM_PAGE_BYTES - 1;
typedef decltype(hsa_agent_t::handle) hsa_agent_handle_t;
struct hsa_pfn_t {
decltype(hsa_init)* hsa_init;
decltype(hsa_shut_down)* hsa_shut_down;
decltype(hsa_agent_get_info)* hsa_agent_get_info;
decltype(hsa_iterate_agents)* hsa_iterate_agents;
decltype(hsa_queue_create)* hsa_queue_create;
decltype(hsa_queue_destroy)* hsa_queue_destroy;
decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed;
decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed;
decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed;
decltype(hsa_signal_create)* hsa_signal_create;
decltype(hsa_signal_destroy)* hsa_signal_destroy;
decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed;
decltype(hsa_signal_wait_scacquire)* hsa_signal_wait_scacquire;
decltype(hsa_amd_agent_iterate_memory_pools)* hsa_amd_agent_iterate_memory_pools;
decltype(hsa_amd_memory_pool_get_info)* hsa_amd_memory_pool_get_info;
decltype(hsa_amd_memory_pool_allocate)* hsa_amd_memory_pool_allocate;
decltype(hsa_amd_agents_allow_access)* hsa_amd_agents_allow_access;
decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy;
decltype(hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table;
decltype(hsa_code_object_reader_create_from_file)* hsa_code_object_reader_create_from_file;
decltype(hsa_executable_create_alt)* hsa_executable_create_alt;
decltype(hsa_executable_load_agent_code_object)* hsa_executable_load_agent_code_object;
decltype(hsa_executable_freeze)* hsa_executable_freeze;
decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol;
decltype(hsa_amd_signal_async_handler)* hsa_amd_signal_async_handler;
decltype(hsa_amd_profiling_get_async_copy_time)* hsa_amd_profiling_get_async_copy_time;
decltype(hsa_amd_profiling_get_dispatch_time)* hsa_amd_profiling_get_dispatch_time;
decltype(hsa_signal_load_relaxed)* hsa_signal_load_relaxed;
decltype(hsa_signal_store_screlease)* hsa_signal_store_screlease;
};
// Encapsulates information about a Hsa Agent such as its
// handle, name, max queue size, max wavefront size, etc.
struct AgentInfo {
@@ -163,17 +203,20 @@ class HsaRsrcFactory {
static HsaRsrcFactory* Create(bool initialize_hsa = true) {
std::lock_guard<mutex_t> lck(mutex_);
if (instance_ == NULL) {
instance_ = new HsaRsrcFactory(initialize_hsa);
HsaRsrcFactory* obj = instance_.load(std::memory_order_relaxed);
if (obj == NULL) {
obj = new HsaRsrcFactory(initialize_hsa);
instance_.store(obj, std::memory_order_release);
}
return instance_;
return obj;
}
static HsaRsrcFactory& Instance() {
if (instance_ == NULL) instance_ = Create(false);
hsa_status_t status = (instance_ != NULL) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
HsaRsrcFactory* obj = instance_.load(std::memory_order_acquire);
if (obj == NULL) obj = Create(false);
hsa_status_t status = (obj != NULL) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
CHECK_STATUS("HsaRsrcFactory::Instance() failed", status);
return *instance_;
return *obj;
}
static void Destroy() {
@@ -275,6 +318,10 @@ class HsaRsrcFactory {
static uint64_t Submit(hsa_queue_t* queue, const void* packet);
static uint64_t Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes);
// Initialize HSA API table
void static InitHsaApiTable(HsaApiTable* table);
static const hsa_pfn_t* HsaApi() { return &hsa_api_; }
// Return AqlProfile API table
typedef hsa_ven_amd_aqlprofile_pfn_t aqlprofile_pfn_t;
const aqlprofile_pfn_t* AqlProfileApi() const { return &aqlprofile_api_; }
@@ -292,7 +339,7 @@ class HsaRsrcFactory {
static void SetTimeoutNs(const timestamp_t& time) {
std::lock_guard<mutex_t> lck(mutex_);
timeout_ns_ = time;
if (instance_ != NULL) instance_->timeout_ = instance_->timer_->ns_to_sysclock(time);
if (instance_ != NULL) Instance().timeout_ = Instance().timer_->ns_to_sysclock(time);
}
private:
@@ -321,7 +368,7 @@ class HsaRsrcFactory {
// HSA was initialized
const bool initialize_hsa_;
static HsaRsrcFactory* instance_;
static std::atomic<HsaRsrcFactory*> instance_;
static mutex_t mutex_;
// Used to maintain a list of Hsa Gpu Agent Info
@@ -335,6 +382,9 @@ class HsaRsrcFactory {
// System agents map
std::map<hsa_agent_handle_t, const AgentInfo*> agent_map_;
// HSA runtime API table
static hsa_pfn_t hsa_api_;
// AqlProfile API table
aqlprofile_pfn_t aqlprofile_api_;
+22 -13
Просмотреть файл
@@ -75,8 +75,16 @@ class Logger {
static Logger* Create() {
std::lock_guard<mutex_t> lck(mutex_);
if (instance_ == NULL) instance_ = new Logger();
return instance_;
Logger* obj = instance_.load(std::memory_order_relaxed);
if (obj == NULL) {
obj = new Logger();
if (obj == NULL) {
std::cerr << "ROCTracer: log object creation failed" << std::endl << std::flush;
abort();
}
instance_.store(obj, std::memory_order_release);
}
return obj;
}
static void Destroy() {
@@ -86,8 +94,9 @@ class Logger {
}
static Logger& Instance() {
Create();
return *instance_;
Logger* obj = instance_.load(std::memory_order_acquire);
if (obj == NULL) obj = Create();
return *obj;
}
private:
@@ -152,7 +161,7 @@ class Logger {
bool messaging_;
static mutex_t mutex_;
static Logger* instance_;
static std::atomic<Logger*> instance_;
std::map<uint32_t, std::string> message_;
};
@@ -160,32 +169,32 @@ class Logger {
} // namespace roctracer
#define ERR_LOGGING(stream) \
{ \
do { \
roctracer::util::Logger::Instance() << "error: " << roctracer::util::Logger::begm \
<< stream << roctracer::util::Logger::endl; \
}
} while(0)
#define INFO_LOGGING(stream) \
{ \
do { \
roctracer::util::Logger::Instance() << "info: " << roctracer::util::Logger::begm << stream \
<< roctracer::util::Logger::endl; \
}
} while(0)
#define WARN_LOGGING(stream) \
{ \
do { \
std::cerr << "ROCProfiler: " << stream << std::endl; \
roctracer::util::Logger::Instance() << "warning: " << roctracer::util::Logger::begm << stream \
<< roctracer::util::Logger::endl; \
}
} while(0)
#ifdef DEBUG
#define DBG_LOGGING(stream) \
{ \
do { \
roctracer::util::Logger::Instance() << roctracer::util::Logger::begm << "debug: \"" \
<< stream << "\"" < < < < \
" in " << __FUNCTION__ << " at " << __FILE__ << " line " << __LINE__ \
<< roctracer::util::Logger::endl; \
}
} while(0)
#endif
#endif // SRC_UTIL_LOGGER_H_
+1 -1
Просмотреть файл
@@ -49,7 +49,7 @@ target_link_libraries ( ${TEST_LIB} ${ROCTRACER_TARGET} ${HSA_RUNTIME_LIB} c std
## Build HSA test
execute_process ( COMMAND sh -xc "if [ ! -e ${TEST_DIR}/hsa ] ; then git clone https://github.com/ROCmSoftwarePlatform/hsa-class.git ${TEST_DIR}/hsa; fi" )
execute_process ( COMMAND sh -xc "if [ -e ${TEST_DIR}/hsa ] ; then cd ${TEST_DIR}/hsa && git checkout e771707; fi" )
execute_process ( COMMAND sh -xc "if [ -e ${TEST_DIR}/hsa ] ; then cd ${TEST_DIR}/hsa && git checkout 5b0d1af; fi" )
set ( TEST_DIR ${HSA_TEST_DIR} )
add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test/hsa )