2016-03-22 18:10:13 -05:00
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
//
|
|
|
|
|
// The University of Illinois/NCSA
|
|
|
|
|
// Open Source License (NCSA)
|
|
|
|
|
//
|
|
|
|
|
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
|
//
|
|
|
|
|
// Developed by:
|
|
|
|
|
//
|
|
|
|
|
// AMD Research and AMD HSA Software Development
|
|
|
|
|
//
|
|
|
|
|
// Advanced Micro Devices, Inc.
|
|
|
|
|
//
|
|
|
|
|
// www.amd.com
|
|
|
|
|
//
|
|
|
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
|
// of this software and associated documentation files (the "Software"), to
|
|
|
|
|
// deal with the Software without restriction, including without limitation
|
|
|
|
|
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
// and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
// Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
//
|
|
|
|
|
// - Redistributions of source code must retain the above copyright notice,
|
|
|
|
|
// this list of conditions and the following disclaimers.
|
|
|
|
|
// - Redistributions in binary form must reproduce the above copyright
|
|
|
|
|
// notice, this list of conditions and the following disclaimers in
|
|
|
|
|
// the documentation and/or other materials provided with the distribution.
|
|
|
|
|
// - Neither the names of Advanced Micro Devices, Inc,
|
|
|
|
|
// nor the names of its contributors may be used to endorse or promote
|
|
|
|
|
// products derived from this Software without specific prior written
|
|
|
|
|
// permission.
|
|
|
|
|
//
|
|
|
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
|
|
|
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
|
|
|
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
|
|
|
// DEALINGS WITH THE SOFTWARE.
|
|
|
|
|
//
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
#include "core/inc/runtime.h"
|
|
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
|
#include <atomic>
|
|
|
|
|
#include <cstring>
|
|
|
|
|
#include <string>
|
|
|
|
|
#include <thread>
|
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
|
|
#include "core/common/shared.h"
|
|
|
|
|
#include "core/inc/hsa_ext_interface.h"
|
|
|
|
|
#include "core/inc/amd_cpu_agent.h"
|
|
|
|
|
#include "core/inc/amd_gpu_agent.h"
|
|
|
|
|
#include "core/inc/amd_memory_region.h"
|
|
|
|
|
#include "core/inc/amd_topology.h"
|
|
|
|
|
#include "core/inc/signal.h"
|
|
|
|
|
#include "core/inc/interrupt_signal.h"
|
2016-06-10 17:31:13 -05:00
|
|
|
#include "core/inc/hsa_ext_amd_impl.h"
|
2016-03-22 18:10:13 -05:00
|
|
|
#include "core/inc/hsa_api_trace_int.h"
|
2017-11-10 16:37:41 -06:00
|
|
|
#include "core/util/os.h"
|
|
|
|
|
#include "inc/hsa_ven_amd_aqlprofile.h"
|
2016-03-22 18:10:13 -05:00
|
|
|
|
|
|
|
|
#define HSA_VERSION_MAJOR 1
|
2016-08-22 01:40:26 -04:00
|
|
|
#define HSA_VERSION_MINOR 1
|
2016-03-22 18:10:13 -05:00
|
|
|
|
2017-03-17 00:29:59 -05:00
|
|
|
const char rocrbuildid[] __attribute__((used)) = "ROCR BUILD ID: " STRING(ROCR_BUILD_ID);
|
2016-04-11 15:03:06 -05:00
|
|
|
|
2016-03-22 18:10:13 -05:00
|
|
|
namespace core {
|
|
|
|
|
bool g_use_interrupt_wait = true;
|
|
|
|
|
|
|
|
|
|
Runtime* Runtime::runtime_singleton_ = NULL;
|
|
|
|
|
|
|
|
|
|
KernelMutex Runtime::bootstrap_lock_;
|
|
|
|
|
|
|
|
|
|
static bool loaded = true;
|
|
|
|
|
|
|
|
|
|
class RuntimeCleanup {
|
|
|
|
|
public:
|
|
|
|
|
~RuntimeCleanup() {
|
|
|
|
|
if (!Runtime::IsOpen()) {
|
|
|
|
|
delete Runtime::runtime_singleton_;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
loaded = false;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static RuntimeCleanup cleanup_at_unload_;
|
|
|
|
|
|
2016-08-22 17:27:00 -05:00
|
|
|
hsa_status_t Runtime::Acquire() {
|
2016-03-22 18:10:13 -05:00
|
|
|
// Check to see if HSA has been cleaned up (process exit)
|
2016-08-22 17:27:00 -05:00
|
|
|
if (!loaded) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
2016-03-22 18:10:13 -05:00
|
|
|
|
|
|
|
|
ScopedAcquire<KernelMutex> boot(&bootstrap_lock_);
|
|
|
|
|
|
|
|
|
|
if (runtime_singleton_ == NULL) {
|
|
|
|
|
runtime_singleton_ = new Runtime();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (runtime_singleton_->ref_count_ == INT32_MAX) {
|
2016-08-22 17:27:00 -05:00
|
|
|
return HSA_STATUS_ERROR_REFCOUNT_OVERFLOW;
|
2016-03-22 18:10:13 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
runtime_singleton_->ref_count_++;
|
2018-02-28 05:20:07 -06:00
|
|
|
MAKE_NAMED_SCOPE_GUARD(refGuard, [&]() { runtime_singleton_->ref_count_--; });
|
2016-03-22 18:10:13 -05:00
|
|
|
|
|
|
|
|
if (runtime_singleton_->ref_count_ == 1) {
|
2016-08-22 17:27:00 -05:00
|
|
|
hsa_status_t status = runtime_singleton_->Load();
|
|
|
|
|
|
|
|
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
|
|
|
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
|
|
|
|
}
|
2016-03-22 18:10:13 -05:00
|
|
|
}
|
|
|
|
|
|
2018-02-28 05:20:07 -06:00
|
|
|
refGuard.Dismiss();
|
2016-08-22 17:27:00 -05:00
|
|
|
return HSA_STATUS_SUCCESS;
|
2016-03-22 18:10:13 -05:00
|
|
|
}
|
|
|
|
|
|
2016-08-22 17:27:00 -05:00
|
|
|
hsa_status_t Runtime::Release() {
|
2018-10-22 19:15:22 -05:00
|
|
|
// Check to see if HSA has been cleaned up (process exit)
|
|
|
|
|
if (!loaded) return HSA_STATUS_SUCCESS;
|
|
|
|
|
|
|
|
|
|
ScopedAcquire<KernelMutex> boot(&bootstrap_lock_);
|
2016-03-22 18:10:13 -05:00
|
|
|
|
2018-10-22 19:15:22 -05:00
|
|
|
if (runtime_singleton_ == nullptr) return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
|
|
|
|
|
|
|
|
|
if (runtime_singleton_->ref_count_ == 1) {
|
2016-03-22 18:10:13 -05:00
|
|
|
// Release all registered memory, then unload backends
|
2018-10-22 19:15:22 -05:00
|
|
|
runtime_singleton_->Unload();
|
2016-03-22 18:10:13 -05:00
|
|
|
}
|
|
|
|
|
|
2018-10-22 19:15:22 -05:00
|
|
|
runtime_singleton_->ref_count_--;
|
|
|
|
|
|
|
|
|
|
if (runtime_singleton_->ref_count_ == 0) {
|
|
|
|
|
delete runtime_singleton_;
|
|
|
|
|
runtime_singleton_ = nullptr;
|
|
|
|
|
}
|
2016-03-22 18:10:13 -05:00
|
|
|
|
2016-08-22 17:27:00 -05:00
|
|
|
return HSA_STATUS_SUCCESS;
|
2016-03-22 18:10:13 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool Runtime::IsOpen() {
|
|
|
|
|
return (Runtime::runtime_singleton_ != NULL) &&
|
|
|
|
|
(Runtime::runtime_singleton_->ref_count_ != 0);
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-09 03:07:10 -06:00
|
|
|
// Register agent information only. Must not call anything that may use the registered information
|
|
|
|
|
// since those tables are incomplete.
|
2016-03-22 18:10:13 -05:00
|
|
|
void Runtime::RegisterAgent(Agent* agent) {
|
2016-11-09 22:21:24 -06:00
|
|
|
// Record the agent in the node-to-agent reverse lookup table.
|
|
|
|
|
agents_by_node_[agent->node_id()].push_back(agent);
|
|
|
|
|
|
|
|
|
|
// Process agent as a cpu or gpu device.
|
2016-03-22 18:10:13 -05:00
|
|
|
if (agent->device_type() == Agent::DeviceType::kAmdCpuDevice) {
|
|
|
|
|
cpu_agents_.push_back(agent);
|
|
|
|
|
|
|
|
|
|
// Add cpu regions to the system region list.
|
|
|
|
|
for (const core::MemoryRegion* region : agent->regions()) {
|
|
|
|
|
if (region->fine_grain()) {
|
|
|
|
|
system_regions_fine_.push_back(region);
|
|
|
|
|
} else {
|
|
|
|
|
system_regions_coarse_.push_back(region);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(system_regions_fine_.size() > 0);
|
|
|
|
|
|
|
|
|
|
// Init default fine grain system region allocator using fine grain
|
|
|
|
|
// system region of the first discovered CPU agent.
|
|
|
|
|
if (cpu_agents_.size() == 1) {
|
2016-08-22 20:19:21 -05:00
|
|
|
// Might need memory pooling to cover allocation that
|
|
|
|
|
// requires less than 4096 bytes.
|
|
|
|
|
system_allocator_ =
|
|
|
|
|
[&](size_t size, size_t alignment,
|
|
|
|
|
MemoryRegion::AllocateFlags alloc_flags) -> void* {
|
|
|
|
|
assert(alignment <= 4096);
|
|
|
|
|
void* ptr = NULL;
|
|
|
|
|
return (HSA_STATUS_SUCCESS ==
|
|
|
|
|
core::Runtime::runtime_singleton_->AllocateMemory(
|
|
|
|
|
system_regions_fine_[0], size, alloc_flags, &ptr))
|
|
|
|
|
? ptr
|
|
|
|
|
: NULL;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
system_deallocator_ =
|
|
|
|
|
[](void* ptr) { core::Runtime::runtime_singleton_->FreeMemory(ptr); };
|
2016-03-22 18:10:13 -05:00
|
|
|
|
|
|
|
|
BaseShared::SetAllocateAndFree(system_allocator_, system_deallocator_);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Setup system clock frequency for the first time.
|
|
|
|
|
if (sys_clock_freq_ == 0) {
|
|
|
|
|
// Cache system clock frequency
|
|
|
|
|
HsaClockCounters clocks;
|
|
|
|
|
hsaKmtGetClockCounters(0, &clocks);
|
|
|
|
|
sys_clock_freq_ = clocks.SystemClockFrequencyHz;
|
|
|
|
|
}
|
|
|
|
|
} else if (agent->device_type() == Agent::DeviceType::kAmdGpuDevice) {
|
|
|
|
|
gpu_agents_.push_back(agent);
|
|
|
|
|
|
|
|
|
|
gpu_ids_.push_back(agent->node_id());
|
|
|
|
|
|
2018-11-09 04:36:25 -06:00
|
|
|
// Assign the first discovered gpu agent as region gpu.
|
|
|
|
|
if (region_gpu_ == NULL) region_gpu_ = agent;
|
2016-03-22 18:10:13 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Runtime::DestroyAgents() {
|
2016-11-09 22:21:24 -06:00
|
|
|
agents_by_node_.clear();
|
|
|
|
|
|
2016-03-22 18:10:13 -05:00
|
|
|
std::for_each(gpu_agents_.begin(), gpu_agents_.end(), DeleteObject());
|
|
|
|
|
gpu_agents_.clear();
|
|
|
|
|
|
|
|
|
|
gpu_ids_.clear();
|
|
|
|
|
|
|
|
|
|
std::for_each(cpu_agents_.begin(), cpu_agents_.end(), DeleteObject());
|
|
|
|
|
cpu_agents_.clear();
|
|
|
|
|
|
2018-11-09 04:36:25 -06:00
|
|
|
region_gpu_ = NULL;
|
2016-03-22 18:10:13 -05:00
|
|
|
|
|
|
|
|
system_regions_fine_.clear();
|
|
|
|
|
system_regions_coarse_.clear();
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-28 16:20:03 -05:00
|
|
|
void Runtime::SetLinkCount(size_t num_nodes) {
|
2018-09-18 16:27:30 -05:00
|
|
|
num_nodes_ = num_nodes;
|
2018-08-28 16:20:03 -05:00
|
|
|
link_matrix_.resize(num_nodes * num_nodes);
|
2016-03-22 18:10:13 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Runtime::RegisterLinkInfo(uint32_t node_id_from, uint32_t node_id_to,
|
|
|
|
|
uint32_t num_hop,
|
|
|
|
|
hsa_amd_memory_pool_link_info_t& link_info) {
|
|
|
|
|
const uint32_t idx = GetIndexLinkInfo(node_id_from, node_id_to);
|
|
|
|
|
link_matrix_[idx].num_hop = num_hop;
|
|
|
|
|
link_matrix_[idx].info = link_info;
|
2016-05-02 16:27:11 -05:00
|
|
|
|
|
|
|
|
// Limit the number of hop to 1 since the runtime does not have enough
|
|
|
|
|
// information to share to the user about each hop.
|
|
|
|
|
link_matrix_[idx].num_hop = std::min(link_matrix_[idx].num_hop , 1U);
|
2016-03-22 18:10:13 -05:00
|
|
|
}
|
|
|
|
|
|
2016-04-13 12:39:25 -05:00
|
|
|
const Runtime::LinkInfo Runtime::GetLinkInfo(uint32_t node_id_from,
|
|
|
|
|
uint32_t node_id_to) {
|
|
|
|
|
return (node_id_from != node_id_to)
|
|
|
|
|
? link_matrix_[GetIndexLinkInfo(node_id_from, node_id_to)]
|
|
|
|
|
: LinkInfo(); // No link.
|
2016-03-22 18:10:13 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t Runtime::GetIndexLinkInfo(uint32_t node_id_from, uint32_t node_id_to) {
|
2018-09-18 16:27:30 -05:00
|
|
|
return ((node_id_from * num_nodes_) + node_id_to);
|
2016-03-22 18:10:13 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hsa_status_t Runtime::IterateAgent(hsa_status_t (*callback)(hsa_agent_t agent,
|
|
|
|
|
void* data),
|
|
|
|
|
void* data) {
|
2017-10-21 10:35:27 -05:00
|
|
|
AMD::callback_t<decltype(callback)> call(callback);
|
2016-03-22 18:10:13 -05:00
|
|
|
|
|
|
|
|
std::vector<core::Agent*>* agent_lists[2] = {&cpu_agents_, &gpu_agents_};
|
|
|
|
|
for (std::vector<core::Agent*>* agent_list : agent_lists) {
|
|
|
|
|
for (size_t i = 0; i < agent_list->size(); ++i) {
|
|
|
|
|
hsa_agent_t agent = Agent::Convert(agent_list->at(i));
|
2017-10-21 10:35:27 -05:00
|
|
|
hsa_status_t status = call(agent, data);
|
2016-03-22 18:10:13 -05:00
|
|
|
|
|
|
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
|
|
|
return status;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hsa_status_t Runtime::AllocateMemory(const MemoryRegion* region, size_t size,
|
2016-08-22 20:19:21 -05:00
|
|
|
MemoryRegion::AllocateFlags alloc_flags,
|
2016-03-22 18:10:13 -05:00
|
|
|
void** address) {
|
2016-11-09 22:21:24 -06:00
|
|
|
ScopedAcquire<KernelMutex> lock(&memory_lock_);
|
2016-08-22 20:19:21 -05:00
|
|
|
hsa_status_t status = region->Allocate(size, alloc_flags, address);
|
2016-03-22 18:10:13 -05:00
|
|
|
|
|
|
|
|
// Track the allocation result so that it could be freed properly.
|
|
|
|
|
if (status == HSA_STATUS_SUCCESS) {
|
|
|
|
|
allocation_map_[*address] = AllocationRegion(region, size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return status;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hsa_status_t Runtime::FreeMemory(void* ptr) {
|
2017-09-11 20:31:20 -05:00
|
|
|
if (ptr == nullptr) {
|
2016-03-22 18:10:13 -05:00
|
|
|
return HSA_STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
2017-09-11 20:31:20 -05:00
|
|
|
const MemoryRegion* region = nullptr;
|
2016-03-22 18:10:13 -05:00
|
|
|
size_t size = 0;
|
2019-06-07 13:04:12 -05:00
|
|
|
std::unique_ptr<std::vector<AllocationRegion::notifier_t>> notifiers;
|
2016-03-22 18:10:13 -05:00
|
|
|
|
2019-06-07 13:04:12 -05:00
|
|
|
{
|
|
|
|
|
ScopedAcquire<KernelMutex> lock(&memory_lock_);
|
2016-03-22 18:10:13 -05:00
|
|
|
|
2019-06-07 13:04:12 -05:00
|
|
|
std::map<const void*, AllocationRegion>::iterator it = allocation_map_.find(ptr);
|
|
|
|
|
|
|
|
|
|
if (it == allocation_map_.end()) {
|
|
|
|
|
debug_warning(false && "Can't find address in allocation map");
|
|
|
|
|
return HSA_STATUS_ERROR_INVALID_ALLOCATION;
|
|
|
|
|
}
|
|
|
|
|
region = it->second.region;
|
|
|
|
|
size = it->second.size;
|
|
|
|
|
|
|
|
|
|
// Imported fragments can't be released with FreeMemory.
|
|
|
|
|
if (region == nullptr) {
|
|
|
|
|
assert(false && "Can't release imported memory with free.");
|
|
|
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
|
|
|
}
|
2016-11-09 22:21:24 -06:00
|
|
|
|
2019-06-07 13:04:12 -05:00
|
|
|
notifiers = std::move(it->second.notifiers);
|
|
|
|
|
|
|
|
|
|
allocation_map_.erase(it);
|
|
|
|
|
|
|
|
|
|
// Fast path to avoid doubling lock ops in the common case (no notifiers).
|
|
|
|
|
if (!notifiers) return region->Free(ptr, size);
|
2017-09-27 18:44:54 -05:00
|
|
|
}
|
2017-09-11 20:31:20 -05:00
|
|
|
|
2019-06-07 13:04:12 -05:00
|
|
|
// Notifiers can't run while holding the lock or the callback won't be able to manage memory.
|
|
|
|
|
// The memory triggering the notification has already been removed from the memory map so can't
|
|
|
|
|
// be double released during the callback.
|
|
|
|
|
for (auto& notifier : *notifiers) {
|
|
|
|
|
notifier.callback(notifier.ptr, notifier.user_data);
|
|
|
|
|
}
|
2016-03-22 18:10:13 -05:00
|
|
|
|
2019-06-07 13:04:12 -05:00
|
|
|
// Fragment allocator requires protection.
|
|
|
|
|
ScopedAcquire<KernelMutex> lock(&memory_lock_);
|
2016-03-22 18:10:13 -05:00
|
|
|
return region->Free(ptr, size);
|
|
|
|
|
}
|
|
|
|
|
|
2019-06-07 13:04:12 -05:00
|
|
|
hsa_status_t Runtime::RegisterReleaseNotifier(void* ptr, hsa_amd_deallocation_callback_t callback,
|
|
|
|
|
void* user_data) {
|
|
|
|
|
ScopedAcquire<KernelMutex> lock(&memory_lock_);
|
|
|
|
|
auto mem = allocation_map_.upper_bound(ptr);
|
|
|
|
|
if (mem != allocation_map_.begin()) {
|
|
|
|
|
mem--;
|
|
|
|
|
|
|
|
|
|
// No support for imported fragments yet.
|
|
|
|
|
if (mem->second.region == nullptr) return HSA_STATUS_ERROR_INVALID_ALLOCATION;
|
|
|
|
|
|
|
|
|
|
if ((mem->first <= ptr) &&
|
|
|
|
|
(ptr < reinterpret_cast<const uint8_t*>(mem->first) + mem->second.size)) {
|
|
|
|
|
auto& notifiers = mem->second.notifiers;
|
|
|
|
|
if (!notifiers) notifiers.reset(new std::vector<AllocationRegion::notifier_t>);
|
|
|
|
|
AllocationRegion::notifier_t notifier = {
|
|
|
|
|
ptr, AMD::callback_t<hsa_amd_deallocation_callback_t>(callback), user_data};
|
|
|
|
|
notifiers->push_back(notifier);
|
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return HSA_STATUS_ERROR_INVALID_ALLOCATION;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hsa_status_t Runtime::DeregisterReleaseNotifier(void* ptr,
|
|
|
|
|
hsa_amd_deallocation_callback_t callback) {
|
|
|
|
|
hsa_status_t ret = HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
|
|
|
ScopedAcquire<KernelMutex> lock(&memory_lock_);
|
|
|
|
|
auto mem = allocation_map_.upper_bound(ptr);
|
|
|
|
|
if (mem != allocation_map_.begin()) {
|
|
|
|
|
mem--;
|
|
|
|
|
if ((mem->first <= ptr) &&
|
|
|
|
|
(ptr < reinterpret_cast<const uint8_t*>(mem->first) + mem->second.size)) {
|
|
|
|
|
auto& notifiers = mem->second.notifiers;
|
|
|
|
|
if (!notifiers) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
|
|
|
for (size_t i = 0; i < notifiers->size(); i++) {
|
|
|
|
|
if (((*notifiers)[i].ptr == ptr) && ((*notifiers)[i].callback) == callback) {
|
|
|
|
|
(*notifiers)[i] = std::move((*notifiers)[notifiers->size() - 1]);
|
|
|
|
|
notifiers->pop_back();
|
|
|
|
|
i--;
|
|
|
|
|
ret = HSA_STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-22 18:10:13 -05:00
|
|
|
hsa_status_t Runtime::CopyMemory(void* dst, const void* src, size_t size) {
|
2018-10-05 01:28:09 -05:00
|
|
|
// Choose agents from pointer info
|
2016-03-22 18:10:13 -05:00
|
|
|
bool is_src_system = false;
|
|
|
|
|
bool is_dst_system = false;
|
2018-10-05 01:28:09 -05:00
|
|
|
core::Agent* src_agent;
|
|
|
|
|
core::Agent* dst_agent;
|
|
|
|
|
|
|
|
|
|
// Fetch ownership
|
2018-10-10 20:13:19 -05:00
|
|
|
const auto& is_system_mem = [&](void* ptr, core::Agent*& agent) {
|
|
|
|
|
hsa_amd_pointer_info_t info;
|
|
|
|
|
info.size = sizeof(info);
|
|
|
|
|
hsa_status_t err = PtrInfo(ptr, &info, nullptr, nullptr, nullptr);
|
|
|
|
|
if (err != HSA_STATUS_SUCCESS)
|
|
|
|
|
throw AMD::hsa_exception(err, "PtrInfo failed in hsa_memory_copy.");
|
|
|
|
|
ptrdiff_t endPtr = (ptrdiff_t)ptr + size;
|
|
|
|
|
if (info.agentBaseAddress <= ptr &&
|
|
|
|
|
endPtr <= (ptrdiff_t)info.agentBaseAddress + info.sizeInBytes) {
|
|
|
|
|
agent = core::Agent::Convert(info.agentOwner);
|
|
|
|
|
return agent->device_type() != core::Agent::DeviceType::kAmdGpuDevice;
|
|
|
|
|
} else {
|
|
|
|
|
agent = cpu_agents_[0];
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
};
|
2016-03-22 18:10:13 -05:00
|
|
|
|
2018-10-10 20:13:19 -05:00
|
|
|
is_src_system = is_system_mem(const_cast<void*>(src), src_agent);
|
|
|
|
|
is_dst_system = is_system_mem(dst, dst_agent);
|
2016-03-22 18:10:13 -05:00
|
|
|
|
2018-10-05 01:28:09 -05:00
|
|
|
// CPU-CPU
|
2016-03-22 18:10:13 -05:00
|
|
|
if (is_src_system && is_dst_system) {
|
2018-10-05 01:28:09 -05:00
|
|
|
memcpy(dst, src, size);
|
2016-03-22 18:10:13 -05:00
|
|
|
return HSA_STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
2018-10-05 01:28:09 -05:00
|
|
|
// Same GPU
|
|
|
|
|
if (src_agent->node_id() == dst_agent->node_id()) return dst_agent->DmaCopy(dst, src, size);
|
2016-03-22 18:10:13 -05:00
|
|
|
|
2018-10-05 01:28:09 -05:00
|
|
|
// GPU-CPU
|
|
|
|
|
// Must ensure that system memory is visible to the GPU during the copy.
|
2016-03-22 18:10:13 -05:00
|
|
|
const amd::MemoryRegion* system_region =
|
2018-10-05 01:28:09 -05:00
|
|
|
static_cast<const amd::MemoryRegion*>(system_regions_fine_[0]);
|
2016-03-22 18:10:13 -05:00
|
|
|
|
2018-10-10 20:13:19 -05:00
|
|
|
const auto& locked_copy = [&](void* ptr, core::Agent* locking_agent, bool locking_src) {
|
2018-10-05 01:28:09 -05:00
|
|
|
void* gpuPtr;
|
2018-10-10 20:13:19 -05:00
|
|
|
hsa_agent_t agent = locking_agent->public_handle();
|
|
|
|
|
hsa_status_t err = system_region->Lock(1, &agent, ptr, size, &gpuPtr);
|
2018-10-05 01:28:09 -05:00
|
|
|
if (err != HSA_STATUS_SUCCESS) return err;
|
2018-10-10 20:13:19 -05:00
|
|
|
MAKE_SCOPE_GUARD([&]() { system_region->Unlock(ptr); });
|
|
|
|
|
if (locking_src)
|
|
|
|
|
return locking_agent->DmaCopy(dst, gpuPtr, size);
|
|
|
|
|
else
|
|
|
|
|
return locking_agent->DmaCopy(gpuPtr, src, size);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (is_src_system) return locked_copy(const_cast<void*>(src), dst_agent, true);
|
|
|
|
|
if (is_dst_system) return locked_copy(dst, src_agent, false);
|
2016-03-22 18:10:13 -05:00
|
|
|
|
2018-10-05 01:28:09 -05:00
|
|
|
/*
|
|
|
|
|
GPU-GPU - functional support, not a performance path.
|
|
|
|
|
|
|
|
|
|
This goes through system memory because we have to support copying between non-peer GPUs
|
|
|
|
|
and we can't use P2P pointers even if the GPUs are peers. Because hsa_amd_agents_allow_access
|
|
|
|
|
requires the caller to specify all allowed agents we can't assume that a peer mapped pointer
|
|
|
|
|
would remain mapped for the duration of the copy.
|
|
|
|
|
*/
|
|
|
|
|
void* temp = nullptr;
|
|
|
|
|
system_region->Allocate(size, core::MemoryRegion::AllocateNoFlags, &temp);
|
|
|
|
|
MAKE_SCOPE_GUARD([&]() { system_region->Free(temp, size); });
|
2018-10-10 20:13:19 -05:00
|
|
|
hsa_status_t err = src_agent->DmaCopy(temp, src, size);
|
2018-10-05 01:28:09 -05:00
|
|
|
if (err == HSA_STATUS_SUCCESS) err = dst_agent->DmaCopy(dst, temp, size);
|
|
|
|
|
return err;
|
2016-03-22 18:10:13 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hsa_status_t Runtime::CopyMemory(void* dst, core::Agent& dst_agent,
|
|
|
|
|
const void* src, core::Agent& src_agent,
|
|
|
|
|
size_t size,
|
|
|
|
|
std::vector<core::Signal*>& dep_signals,
|
|
|
|
|
core::Signal& completion_signal) {
|
|
|
|
|
const bool dst_gpu =
|
|
|
|
|
(dst_agent.device_type() == core::Agent::DeviceType::kAmdGpuDevice);
|
|
|
|
|
const bool src_gpu =
|
|
|
|
|
(src_agent.device_type() == core::Agent::DeviceType::kAmdGpuDevice);
|
|
|
|
|
if (dst_gpu || src_gpu) {
|
2018-08-08 20:15:22 -05:00
|
|
|
core::Agent* copy_agent = (src_gpu) ? &src_agent : &dst_agent;
|
|
|
|
|
if (flag_.rev_copy_dir() && dst_gpu && src_gpu)
|
|
|
|
|
copy_agent = (copy_agent == &src_agent) ? &dst_agent : &src_agent;
|
|
|
|
|
return copy_agent->DmaCopy(dst, dst_agent, src, src_agent, size, dep_signals,
|
|
|
|
|
completion_signal);
|
2016-03-22 18:10:13 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// For cpu to cpu, fire and forget a copy thread.
|
2016-05-17 15:12:41 -05:00
|
|
|
const bool profiling_enabled =
|
|
|
|
|
(dst_agent.profiling_enabled() || src_agent.profiling_enabled());
|
|
|
|
|
std::thread(
|
|
|
|
|
[](void* dst, const void* src, size_t size,
|
|
|
|
|
std::vector<core::Signal*> dep_signals,
|
|
|
|
|
core::Signal* completion_signal, bool profiling_enabled) {
|
2016-05-31 13:38:45 -05:00
|
|
|
|
2016-05-17 15:12:41 -05:00
|
|
|
for (core::Signal* dep : dep_signals) {
|
|
|
|
|
dep->WaitRelaxed(HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX,
|
|
|
|
|
HSA_WAIT_STATE_BLOCKED);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (profiling_enabled) {
|
2019-04-18 18:27:27 -05:00
|
|
|
core::Runtime::runtime_singleton_->GetSystemInfo(HSA_SYSTEM_INFO_TIMESTAMP,
|
|
|
|
|
&completion_signal->signal_.start_ts);
|
2016-05-17 15:12:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memcpy(dst, src, size);
|
|
|
|
|
|
|
|
|
|
if (profiling_enabled) {
|
2019-04-18 18:27:27 -05:00
|
|
|
core::Runtime::runtime_singleton_->GetSystemInfo(HSA_SYSTEM_INFO_TIMESTAMP,
|
|
|
|
|
&completion_signal->signal_.end_ts);
|
2016-05-17 15:12:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
completion_signal->SubRelease(1);
|
|
|
|
|
},
|
|
|
|
|
dst, src, size, dep_signals, &completion_signal,
|
|
|
|
|
profiling_enabled).detach();
|
2016-03-22 18:10:13 -05:00
|
|
|
|
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hsa_status_t Runtime::FillMemory(void* ptr, uint32_t value, size_t count) {
|
2017-05-15 19:59:31 -05:00
|
|
|
// Choose blit agent from pointer info
|
|
|
|
|
hsa_amd_pointer_info_t info;
|
|
|
|
|
uint32_t agent_count;
|
|
|
|
|
hsa_agent_t* accessible = nullptr;
|
|
|
|
|
info.size = sizeof(info);
|
|
|
|
|
MAKE_SCOPE_GUARD([&]() { free(accessible); });
|
|
|
|
|
hsa_status_t err = PtrInfo(ptr, &info, malloc, &agent_count, &accessible);
|
|
|
|
|
if (err != HSA_STATUS_SUCCESS) return err;
|
|
|
|
|
|
|
|
|
|
ptrdiff_t endPtr = (ptrdiff_t)ptr + count * sizeof(uint32_t);
|
|
|
|
|
|
|
|
|
|
// Check for GPU fill
|
|
|
|
|
// Selects GPU fill for SVM and Locked allocations if a GPU address is given and is mapped.
|
|
|
|
|
if (info.agentBaseAddress <= ptr &&
|
|
|
|
|
endPtr <= (ptrdiff_t)info.agentBaseAddress + info.sizeInBytes) {
|
|
|
|
|
core::Agent* blit_agent = core::Agent::Convert(info.agentOwner);
|
|
|
|
|
if (blit_agent->device_type() != core::Agent::DeviceType::kAmdGpuDevice) {
|
|
|
|
|
blit_agent = nullptr;
|
2017-11-22 19:11:09 -06:00
|
|
|
for (uint32_t i = 0; i < agent_count; i++) {
|
2017-05-15 19:59:31 -05:00
|
|
|
if (core::Agent::Convert(accessible[i])->device_type() ==
|
|
|
|
|
core::Agent::DeviceType::kAmdGpuDevice) {
|
|
|
|
|
blit_agent = core::Agent::Convert(accessible[i]);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (blit_agent) return blit_agent->DmaFill(ptr, value, count);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Host and unmapped SVM addresses copy via host.
|
|
|
|
|
if (info.hostBaseAddress <= ptr && endPtr <= (ptrdiff_t)info.hostBaseAddress + info.sizeInBytes) {
|
|
|
|
|
memset(ptr, value, count * sizeof(uint32_t));
|
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return HSA_STATUS_ERROR_INVALID_ALLOCATION;
|
2016-03-22 18:10:13 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hsa_status_t Runtime::AllowAccess(uint32_t num_agents,
|
|
|
|
|
const hsa_agent_t* agents, const void* ptr) {
|
|
|
|
|
const amd::MemoryRegion* amd_region = NULL;
|
|
|
|
|
size_t alloc_size = 0;
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
ScopedAcquire<KernelMutex> lock(&memory_lock_);
|
|
|
|
|
|
2016-11-09 22:21:24 -06:00
|
|
|
std::map<const void*, AllocationRegion>::const_iterator it = allocation_map_.find(ptr);
|
2016-03-22 18:10:13 -05:00
|
|
|
|
|
|
|
|
if (it == allocation_map_.end()) {
|
|
|
|
|
return HSA_STATUS_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
amd_region = reinterpret_cast<const amd::MemoryRegion*>(it->second.region);
|
|
|
|
|
alloc_size = it->second.size;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return amd_region->AllowAccess(num_agents, agents, ptr, alloc_size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hsa_status_t Runtime::GetSystemInfo(hsa_system_info_t attribute, void* value) {
|
|
|
|
|
switch (attribute) {
|
|
|
|
|
case HSA_SYSTEM_INFO_VERSION_MAJOR:
|
|
|
|
|
*((uint16_t*)value) = HSA_VERSION_MAJOR;
|
|
|
|
|
break;
|
|
|
|
|
case HSA_SYSTEM_INFO_VERSION_MINOR:
|
|
|
|
|
*((uint16_t*)value) = HSA_VERSION_MINOR;
|
|
|
|
|
break;
|
|
|
|
|
case HSA_SYSTEM_INFO_TIMESTAMP: {
|
|
|
|
|
HsaClockCounters clocks;
|
|
|
|
|
hsaKmtGetClockCounters(0, &clocks);
|
2019-05-20 12:41:48 -05:00
|
|
|
*((uint64_t*)value) = clocks.SystemClockCounter;
|
2016-03-22 18:10:13 -05:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY: {
|
|
|
|
|
assert(sys_clock_freq_ != 0 &&
|
|
|
|
|
"Use of HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY before HSA "
|
|
|
|
|
"initialization completes.");
|
|
|
|
|
*(uint64_t*)value = sys_clock_freq_;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT:
|
|
|
|
|
*((uint64_t*)value) = 0xFFFFFFFFFFFFFFFF;
|
|
|
|
|
break;
|
|
|
|
|
case HSA_SYSTEM_INFO_ENDIANNESS:
|
|
|
|
|
#if defined(HSA_LITTLE_ENDIAN)
|
|
|
|
|
*((hsa_endianness_t*)value) = HSA_ENDIANNESS_LITTLE;
|
|
|
|
|
#else
|
|
|
|
|
*((hsa_endianness_t*)value) = HSA_ENDIANNESS_BIG;
|
|
|
|
|
#endif
|
|
|
|
|
break;
|
|
|
|
|
case HSA_SYSTEM_INFO_MACHINE_MODEL:
|
|
|
|
|
#if defined(HSA_LARGE_MODEL)
|
|
|
|
|
*((hsa_machine_model_t*)value) = HSA_MACHINE_MODEL_LARGE;
|
|
|
|
|
#else
|
|
|
|
|
*((hsa_machine_model_t*)value) = HSA_MACHINE_MODEL_SMALL;
|
|
|
|
|
#endif
|
|
|
|
|
break;
|
2016-08-23 05:30:22 -05:00
|
|
|
case HSA_SYSTEM_INFO_EXTENSIONS: {
|
|
|
|
|
memset(value, 0, sizeof(uint8_t) * 128);
|
|
|
|
|
|
|
|
|
|
auto setFlag = [&](uint32_t bit) {
|
|
|
|
|
assert(bit < 128 * 8 && "Extension value exceeds extension bitmask");
|
|
|
|
|
uint index = bit / 8;
|
|
|
|
|
uint subBit = bit % 8;
|
|
|
|
|
((uint8_t*)value)[index] |= 1 << subBit;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (hsa_internal_api_table_.finalizer_api.hsa_ext_program_finalize_fn != NULL) {
|
|
|
|
|
setFlag(HSA_EXTENSION_FINALIZER);
|
|
|
|
|
}
|
2016-03-22 18:10:13 -05:00
|
|
|
|
2016-08-23 05:30:22 -05:00
|
|
|
if (hsa_internal_api_table_.image_api.hsa_ext_image_create_fn != NULL) {
|
|
|
|
|
setFlag(HSA_EXTENSION_IMAGES);
|
|
|
|
|
}
|
2016-03-22 18:10:13 -05:00
|
|
|
|
2017-11-10 16:37:41 -06:00
|
|
|
if (os::LibHandle lib = os::LoadLib(kAqlProfileLib)) {
|
|
|
|
|
os::CloseLib(lib);
|
2017-07-18 16:07:58 -05:00
|
|
|
setFlag(HSA_EXTENSION_AMD_AQLPROFILE);
|
|
|
|
|
}
|
|
|
|
|
|
2016-08-23 05:30:22 -05:00
|
|
|
setFlag(HSA_EXTENSION_AMD_PROFILER);
|
2016-08-22 01:40:26 -04:00
|
|
|
|
2016-08-23 05:30:22 -05:00
|
|
|
break;
|
|
|
|
|
}
|
2018-08-20 20:41:35 -05:00
|
|
|
case HSA_AMD_SYSTEM_INFO_BUILD_VERSION: {
|
|
|
|
|
*(const char**)value = STRING(ROCR_BUILD_ID);
|
|
|
|
|
break;
|
|
|
|
|
}
|
2016-03-22 18:10:13 -05:00
|
|
|
default:
|
|
|
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
|
|
|
}
|
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hsa_status_t Runtime::SetAsyncSignalHandler(hsa_signal_t signal,
|
|
|
|
|
hsa_signal_condition_t cond,
|
|
|
|
|
hsa_signal_value_t value,
|
|
|
|
|
hsa_amd_signal_handler handler,
|
|
|
|
|
void* arg) {
|
|
|
|
|
// Indicate that this signal is in use.
|
|
|
|
|
if (signal.handle != 0) hsa_signal_handle(signal)->Retain();
|
|
|
|
|
|
|
|
|
|
ScopedAcquire<KernelMutex> scope_lock(&async_events_control_.lock);
|
|
|
|
|
|
|
|
|
|
// Lazy initializer
|
|
|
|
|
if (async_events_control_.async_events_thread_ == NULL) {
|
|
|
|
|
// Create monitoring thread control signal
|
|
|
|
|
auto err = HSA::hsa_signal_create(0, 0, NULL, &async_events_control_.wake);
|
|
|
|
|
if (err != HSA_STATUS_SUCCESS) {
|
|
|
|
|
assert(false && "Asyncronous events control signal creation error.");
|
|
|
|
|
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
|
|
|
|
}
|
|
|
|
|
async_events_.PushBack(async_events_control_.wake, HSA_SIGNAL_CONDITION_NE,
|
|
|
|
|
0, NULL, NULL);
|
|
|
|
|
|
|
|
|
|
// Start event monitoring thread
|
|
|
|
|
async_events_control_.exit = false;
|
|
|
|
|
async_events_control_.async_events_thread_ =
|
|
|
|
|
os::CreateThread(AsyncEventsLoop, NULL);
|
|
|
|
|
if (async_events_control_.async_events_thread_ == NULL) {
|
|
|
|
|
assert(false && "Asyncronous events thread creation error.");
|
|
|
|
|
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
new_async_events_.PushBack(signal, cond, value, handler, arg);
|
|
|
|
|
|
|
|
|
|
hsa_signal_handle(async_events_control_.wake)->StoreRelease(1);
|
|
|
|
|
|
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hsa_status_t Runtime::InteropMap(uint32_t num_agents, Agent** agents,
|
|
|
|
|
int interop_handle, uint32_t flags,
|
|
|
|
|
size_t* size, void** ptr,
|
|
|
|
|
size_t* metadata_size, const void** metadata) {
|
2016-11-10 02:58:59 -06:00
|
|
|
static const int tinyArraySize=8;
|
2016-03-22 18:10:13 -05:00
|
|
|
HsaGraphicsResourceInfo info;
|
|
|
|
|
|
2016-11-10 02:58:59 -06:00
|
|
|
HSAuint32 short_nodes[tinyArraySize];
|
2016-03-22 18:10:13 -05:00
|
|
|
HSAuint32* nodes = short_nodes;
|
2016-11-10 02:58:59 -06:00
|
|
|
if (num_agents > tinyArraySize) {
|
2016-03-22 18:10:13 -05:00
|
|
|
nodes = new HSAuint32[num_agents];
|
|
|
|
|
if (nodes == NULL) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
|
|
|
|
}
|
|
|
|
|
MAKE_SCOPE_GUARD([&]() {
|
2016-11-10 02:58:59 -06:00
|
|
|
if (num_agents > tinyArraySize) delete[] nodes;
|
2016-03-22 18:10:13 -05:00
|
|
|
});
|
|
|
|
|
|
2017-11-22 19:11:09 -06:00
|
|
|
for (uint32_t i = 0; i < num_agents; i++)
|
2016-03-22 18:10:13 -05:00
|
|
|
agents[i]->GetInfo((hsa_agent_info_t)HSA_AMD_AGENT_INFO_DRIVER_NODE_ID,
|
|
|
|
|
&nodes[i]);
|
|
|
|
|
|
|
|
|
|
if (hsaKmtRegisterGraphicsHandleToNodes(interop_handle, &info, num_agents,
|
|
|
|
|
nodes) != HSAKMT_STATUS_SUCCESS)
|
|
|
|
|
return HSA_STATUS_ERROR;
|
|
|
|
|
|
|
|
|
|
HSAuint64 altAddress;
|
|
|
|
|
HsaMemMapFlags map_flags;
|
|
|
|
|
map_flags.Value = 0;
|
|
|
|
|
map_flags.ui32.PageSize = HSA_PAGE_SIZE_64KB;
|
|
|
|
|
if (hsaKmtMapMemoryToGPUNodes(info.MemoryAddress, info.SizeInBytes,
|
|
|
|
|
&altAddress, map_flags, num_agents,
|
|
|
|
|
nodes) != HSAKMT_STATUS_SUCCESS) {
|
|
|
|
|
map_flags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
|
2016-11-10 02:51:19 -06:00
|
|
|
if (hsaKmtMapMemoryToGPUNodes(info.MemoryAddress, info.SizeInBytes, &altAddress, map_flags,
|
|
|
|
|
num_agents, nodes) != HSAKMT_STATUS_SUCCESS) {
|
|
|
|
|
hsaKmtDeregisterMemory(info.MemoryAddress);
|
2016-03-22 18:10:13 -05:00
|
|
|
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
2016-11-10 02:51:19 -06:00
|
|
|
}
|
2016-03-22 18:10:13 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (metadata_size != NULL) *metadata_size = info.MetadataSizeInBytes;
|
|
|
|
|
if (metadata != NULL) *metadata = info.Metadata;
|
|
|
|
|
|
|
|
|
|
*size = info.SizeInBytes;
|
|
|
|
|
*ptr = info.MemoryAddress;
|
|
|
|
|
|
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-09 22:21:24 -06:00
|
|
|
hsa_status_t Runtime::InteropUnmap(void* ptr) {
|
2016-03-22 18:10:13 -05:00
|
|
|
if(hsaKmtUnmapMemoryToGPU(ptr)!=HSAKMT_STATUS_SUCCESS)
|
|
|
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
|
|
|
if(hsaKmtDeregisterMemory(ptr)!=HSAKMT_STATUS_SUCCESS)
|
|
|
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-09 22:21:24 -06:00
|
|
|
hsa_status_t Runtime::PtrInfo(void* ptr, hsa_amd_pointer_info_t* info, void* (*alloc)(size_t),
|
2017-09-11 20:31:20 -05:00
|
|
|
uint32_t* num_agents_accessible, hsa_agent_t** accessible,
|
|
|
|
|
PtrInfoBlockData* block_info) {
|
|
|
|
|
static_assert(static_cast<int>(HSA_POINTER_UNKNOWN) == static_cast<int>(HSA_EXT_POINTER_TYPE_UNKNOWN),
|
|
|
|
|
"Thunk pointer info mismatch");
|
|
|
|
|
static_assert(static_cast<int>(HSA_POINTER_ALLOCATED) == static_cast<int>(HSA_EXT_POINTER_TYPE_HSA),
|
|
|
|
|
"Thunk pointer info mismatch");
|
|
|
|
|
static_assert(static_cast<int>(HSA_POINTER_REGISTERED_USER) == static_cast<int>(HSA_EXT_POINTER_TYPE_LOCKED),
|
|
|
|
|
"Thunk pointer info mismatch");
|
|
|
|
|
static_assert(static_cast<int>(HSA_POINTER_REGISTERED_GRAPHICS) == static_cast<int>(HSA_EXT_POINTER_TYPE_GRAPHICS),
|
|
|
|
|
"Thunk pointer info mismatch");
|
|
|
|
|
|
2016-11-09 22:21:24 -06:00
|
|
|
HsaPointerInfo thunkInfo;
|
|
|
|
|
uint32_t* mappedNodes;
|
|
|
|
|
|
2017-05-15 19:59:31 -05:00
|
|
|
hsa_amd_pointer_info_t retInfo;
|
|
|
|
|
|
2017-07-12 20:35:20 -05:00
|
|
|
// check output struct has an initialized size.
|
|
|
|
|
if (info->size == 0) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
2016-11-09 22:21:24 -06:00
|
|
|
|
|
|
|
|
bool returnListData =
|
|
|
|
|
((alloc != nullptr) && (num_agents_accessible != nullptr) && (accessible != nullptr));
|
2017-09-11 20:31:20 -05:00
|
|
|
|
|
|
|
|
{ // memory_lock protects access to the NMappedNodes array and fragment user data since these may
|
|
|
|
|
// change with calls to memory APIs.
|
2016-11-09 22:21:24 -06:00
|
|
|
ScopedAcquire<KernelMutex> lock(&memory_lock_);
|
|
|
|
|
hsaKmtQueryPointerInfo(ptr, &thunkInfo);
|
2017-09-11 20:31:20 -05:00
|
|
|
if (returnListData) {
|
|
|
|
|
assert(thunkInfo.NMappedNodes <= agents_by_node_.size() &&
|
|
|
|
|
"PointerInfo: Thunk returned more than all agents in NMappedNodes.");
|
|
|
|
|
mappedNodes = (uint32_t*)alloca(thunkInfo.NMappedNodes * sizeof(uint32_t));
|
|
|
|
|
memcpy(mappedNodes, thunkInfo.MappedNodes, thunkInfo.NMappedNodes * sizeof(uint32_t));
|
|
|
|
|
}
|
|
|
|
|
retInfo.type = (hsa_amd_pointer_type_t)thunkInfo.Type;
|
|
|
|
|
retInfo.agentBaseAddress = reinterpret_cast<void*>(thunkInfo.GPUAddress);
|
|
|
|
|
retInfo.hostBaseAddress = thunkInfo.CPUAddress;
|
|
|
|
|
retInfo.sizeInBytes = thunkInfo.SizeInBytes;
|
|
|
|
|
retInfo.userData = thunkInfo.UserData;
|
|
|
|
|
if (block_info != nullptr) {
|
2017-09-20 15:22:34 -05:00
|
|
|
// The only time host and agent ptr may be different is when the memory is lock memory (malloc
|
|
|
|
|
// memory pinned for GPU access). In this case there can not be any suballocation so
|
|
|
|
|
// block_info is redundant and unused. Host address is returned since host address is used to
|
|
|
|
|
// manipulate lock memory. This protects future use of block_info with lock memory.
|
2017-09-11 20:31:20 -05:00
|
|
|
block_info->base = retInfo.hostBaseAddress;
|
|
|
|
|
block_info->length = retInfo.sizeInBytes;
|
|
|
|
|
}
|
|
|
|
|
if (retInfo.type == HSA_EXT_POINTER_TYPE_HSA) {
|
|
|
|
|
auto fragment = allocation_map_.upper_bound(ptr);
|
|
|
|
|
if (fragment != allocation_map_.begin()) {
|
|
|
|
|
fragment--;
|
|
|
|
|
if ((fragment->first <= ptr) &&
|
|
|
|
|
(ptr < reinterpret_cast<const uint8_t*>(fragment->first) + fragment->second.size)) {
|
2017-09-20 15:22:34 -05:00
|
|
|
// agent and host address must match here. Only lock memory is allowed to have differing
|
|
|
|
|
// addresses but lock memory has type HSA_EXT_POINTER_TYPE_LOCKED and cannot be
|
|
|
|
|
// suballocated.
|
|
|
|
|
retInfo.agentBaseAddress = const_cast<void*>(fragment->first);
|
|
|
|
|
retInfo.hostBaseAddress = retInfo.agentBaseAddress;
|
2017-09-11 20:31:20 -05:00
|
|
|
retInfo.sizeInBytes = fragment->second.size;
|
|
|
|
|
retInfo.userData = fragment->second.user_ptr;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} // end lock scope
|
2016-11-09 22:21:24 -06:00
|
|
|
|
2017-05-15 19:59:31 -05:00
|
|
|
retInfo.size = Min(info->size, sizeof(hsa_amd_pointer_info_t));
|
2017-09-11 20:31:20 -05:00
|
|
|
|
|
|
|
|
// Temp: workaround thunk bug, IPC memory has garbage in Node.
|
|
|
|
|
// retInfo.agentOwner = agents_by_node_[thunkInfo.Node][0]->public_handle();
|
|
|
|
|
auto it = agents_by_node_.find(thunkInfo.Node);
|
|
|
|
|
if (it != agents_by_node_.end())
|
|
|
|
|
retInfo.agentOwner = agents_by_node_[thunkInfo.Node][0]->public_handle();
|
|
|
|
|
else
|
|
|
|
|
retInfo.agentOwner.handle = 0;
|
2017-05-15 19:59:31 -05:00
|
|
|
|
|
|
|
|
memcpy(info, &retInfo, retInfo.size);
|
2016-11-09 22:21:24 -06:00
|
|
|
|
|
|
|
|
if (returnListData) {
|
|
|
|
|
uint32_t count = 0;
|
2017-11-22 19:11:09 -06:00
|
|
|
for (HSAuint32 i = 0; i < thunkInfo.NMappedNodes; i++) {
|
2016-11-09 22:21:24 -06:00
|
|
|
assert(mappedNodes[i] < agents_by_node_.size() &&
|
|
|
|
|
"PointerInfo: Invalid node ID returned from thunk.");
|
|
|
|
|
count += agents_by_node_[mappedNodes[i]].size();
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-21 10:35:27 -05:00
|
|
|
AMD::callback_t<decltype(alloc)> Alloc(alloc);
|
|
|
|
|
*accessible = (hsa_agent_t*)Alloc(sizeof(hsa_agent_t) * count);
|
2016-11-09 22:21:24 -06:00
|
|
|
if ((*accessible) == nullptr) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
|
|
|
|
*num_agents_accessible = count;
|
|
|
|
|
|
|
|
|
|
uint32_t index = 0;
|
2017-11-22 19:11:09 -06:00
|
|
|
for (HSAuint32 i = 0; i < thunkInfo.NMappedNodes; i++) {
|
2016-11-09 22:21:24 -06:00
|
|
|
auto& list = agents_by_node_[mappedNodes[i]];
|
2017-11-22 19:11:09 -06:00
|
|
|
for (auto agent : list) {
|
|
|
|
|
(*accessible)[index] = agent->public_handle();
|
2016-11-09 22:21:24 -06:00
|
|
|
index++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hsa_status_t Runtime::SetPtrInfoData(void* ptr, void* userptr) {
|
2017-09-11 20:31:20 -05:00
|
|
|
{ // Use allocation map if possible to handle fragments.
|
|
|
|
|
ScopedAcquire<KernelMutex> lock(&memory_lock_);
|
|
|
|
|
const auto& it = allocation_map_.find(ptr);
|
|
|
|
|
if (it != allocation_map_.end()) {
|
|
|
|
|
it->second.user_ptr = userptr;
|
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Cover entries not in the allocation map (graphics, lock,...)
|
2016-11-09 22:21:24 -06:00
|
|
|
if (hsaKmtSetMemoryUserData(ptr, userptr) == HSAKMT_STATUS_SUCCESS)
|
|
|
|
|
return HSA_STATUS_SUCCESS;
|
2017-09-11 20:31:20 -05:00
|
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
2016-11-09 22:21:24 -06:00
|
|
|
}
|
|
|
|
|
|
2016-11-10 02:51:19 -06:00
|
|
|
hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* handle) {
|
2016-12-01 09:31:23 -06:00
|
|
|
static_assert(sizeof(hsa_amd_ipc_memory_t) == sizeof(HsaSharedMemoryHandle),
|
|
|
|
|
"Thunk IPC mismatch.");
|
2017-09-11 20:31:20 -05:00
|
|
|
// Reject sharing allocations larger than ~8TB due to thunk limitations.
|
|
|
|
|
if (len > 0x7FFFFFFF000ull) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
|
|
|
|
|
|
|
|
// Check for fragment sharing.
|
|
|
|
|
PtrInfoBlockData block;
|
|
|
|
|
hsa_amd_pointer_info_t info;
|
|
|
|
|
info.size = sizeof(info);
|
|
|
|
|
if (PtrInfo(ptr, &info, nullptr, nullptr, nullptr, &block) != HSA_STATUS_SUCCESS)
|
2016-12-01 09:31:23 -06:00
|
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
2017-09-11 20:31:20 -05:00
|
|
|
if ((block.base != ptr) || (block.length != len)) {
|
|
|
|
|
if (!IsMultipleOf(block.base, 2 * 1024 * 1024)) {
|
|
|
|
|
assert(false && "Fragment's block not aligned to 2MB!");
|
|
|
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
|
|
|
}
|
|
|
|
|
if (hsaKmtShareMemory(block.base, block.length, reinterpret_cast<HsaSharedMemoryHandle*>(
|
|
|
|
|
handle)) != HSAKMT_STATUS_SUCCESS)
|
|
|
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
|
|
|
uint32_t offset =
|
|
|
|
|
(reinterpret_cast<uint8_t*>(ptr) - reinterpret_cast<uint8_t*>(block.base)) / 4096;
|
|
|
|
|
// Holds size in (4K?) pages in thunk handle: Mark as a fragment and denote offset.
|
|
|
|
|
handle->handle[6] |= 0x80000000 | offset;
|
|
|
|
|
} else {
|
|
|
|
|
if (hsaKmtShareMemory(ptr, len, reinterpret_cast<HsaSharedMemoryHandle*>(handle)) !=
|
|
|
|
|
HSAKMT_STATUS_SUCCESS)
|
|
|
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
|
|
|
}
|
|
|
|
|
return HSA_STATUS_SUCCESS;
|
2016-11-10 02:51:19 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len, uint32_t num_agents,
|
|
|
|
|
Agent** agents, void** mapped_ptr) {
|
2016-12-01 09:31:23 -06:00
|
|
|
static const int tinyArraySize = 8;
|
|
|
|
|
void* importAddress;
|
|
|
|
|
HSAuint64 importSize;
|
|
|
|
|
HSAuint64 altAddress;
|
2017-09-11 20:31:20 -05:00
|
|
|
|
|
|
|
|
hsa_amd_ipc_memory_t importHandle;
|
|
|
|
|
importHandle = *handle;
|
|
|
|
|
|
|
|
|
|
// Extract fragment info
|
|
|
|
|
bool isFragment = false;
|
|
|
|
|
uint32_t fragOffset = 0;
|
|
|
|
|
auto fixFragment = [&]() {
|
|
|
|
|
if (!isFragment) return;
|
|
|
|
|
importAddress = reinterpret_cast<uint8_t*>(importAddress) + fragOffset;
|
|
|
|
|
len = Min(len, importSize - fragOffset);
|
|
|
|
|
ScopedAcquire<KernelMutex> lock(&memory_lock_);
|
|
|
|
|
allocation_map_[importAddress] = AllocationRegion(nullptr, len);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if ((importHandle.handle[6] & 0x80000000) != 0) {
|
|
|
|
|
isFragment = true;
|
|
|
|
|
fragOffset = (importHandle.handle[6] & 0x1FF) * 4096;
|
|
|
|
|
importHandle.handle[6] &= ~(0x80000000 | 0x1FF);
|
|
|
|
|
}
|
|
|
|
|
|
2016-12-01 09:31:23 -06:00
|
|
|
if (num_agents == 0) {
|
2017-09-11 20:31:20 -05:00
|
|
|
if (hsaKmtRegisterSharedHandle(reinterpret_cast<const HsaSharedMemoryHandle*>(&importHandle),
|
2016-12-01 09:31:23 -06:00
|
|
|
&importAddress, &importSize) != HSAKMT_STATUS_SUCCESS)
|
|
|
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
|
|
|
if (hsaKmtMapMemoryToGPU(importAddress, importSize, &altAddress) != HSAKMT_STATUS_SUCCESS) {
|
|
|
|
|
hsaKmtDeregisterMemory(importAddress);
|
|
|
|
|
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
|
|
|
|
}
|
2017-09-11 20:31:20 -05:00
|
|
|
fixFragment();
|
2016-12-01 09:31:23 -06:00
|
|
|
*mapped_ptr = importAddress;
|
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HSAuint32* nodes = nullptr;
|
|
|
|
|
if (num_agents > tinyArraySize)
|
|
|
|
|
nodes = new HSAuint32[num_agents];
|
|
|
|
|
else
|
|
|
|
|
nodes = (HSAuint32*)alloca(sizeof(HSAuint32) * num_agents);
|
|
|
|
|
if (nodes == NULL) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
|
|
|
|
|
|
|
|
|
MAKE_SCOPE_GUARD([&]() {
|
|
|
|
|
if (num_agents > tinyArraySize) delete[] nodes;
|
|
|
|
|
});
|
|
|
|
|
|
2017-11-22 19:11:09 -06:00
|
|
|
for (uint32_t i = 0; i < num_agents; i++)
|
2016-12-01 09:31:23 -06:00
|
|
|
agents[i]->GetInfo((hsa_agent_info_t)HSA_AMD_AGENT_INFO_DRIVER_NODE_ID, &nodes[i]);
|
|
|
|
|
|
2017-09-11 20:31:20 -05:00
|
|
|
if (hsaKmtRegisterSharedHandleToNodes(
|
|
|
|
|
reinterpret_cast<const HsaSharedMemoryHandle*>(&importHandle), &importAddress,
|
|
|
|
|
&importSize, num_agents, nodes) != HSAKMT_STATUS_SUCCESS)
|
2016-12-01 09:31:23 -06:00
|
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
|
|
|
|
|
|
|
|
HsaMemMapFlags map_flags;
|
|
|
|
|
map_flags.Value = 0;
|
|
|
|
|
map_flags.ui32.PageSize = HSA_PAGE_SIZE_64KB;
|
|
|
|
|
if (hsaKmtMapMemoryToGPUNodes(importAddress, importSize, &altAddress, map_flags, num_agents,
|
|
|
|
|
nodes) != HSAKMT_STATUS_SUCCESS) {
|
|
|
|
|
map_flags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
|
|
|
|
|
if (hsaKmtMapMemoryToGPUNodes(importAddress, importSize, &altAddress, map_flags, num_agents,
|
|
|
|
|
nodes) != HSAKMT_STATUS_SUCCESS) {
|
|
|
|
|
hsaKmtDeregisterMemory(importAddress);
|
|
|
|
|
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-09-11 20:31:20 -05:00
|
|
|
fixFragment();
|
2016-12-01 09:31:23 -06:00
|
|
|
*mapped_ptr = importAddress;
|
|
|
|
|
return HSA_STATUS_SUCCESS;
|
2016-11-10 02:51:19 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hsa_status_t Runtime::IPCDetach(void* ptr) {
|
2017-09-11 20:31:20 -05:00
|
|
|
{ // Handle imported fragments.
|
|
|
|
|
ScopedAcquire<KernelMutex> lock(&memory_lock_);
|
|
|
|
|
const auto& it = allocation_map_.find(ptr);
|
|
|
|
|
if (it != allocation_map_.end()) {
|
|
|
|
|
if (it->second.region != nullptr) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
|
|
|
allocation_map_.erase(it);
|
|
|
|
|
lock.Release(); // Can't hold memory lock when using pointer info.
|
|
|
|
|
|
|
|
|
|
PtrInfoBlockData block;
|
|
|
|
|
hsa_amd_pointer_info_t info;
|
|
|
|
|
info.size = sizeof(info);
|
|
|
|
|
if (PtrInfo(ptr, &info, nullptr, nullptr, nullptr, &block) != HSA_STATUS_SUCCESS)
|
|
|
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
|
|
|
ptr = block.base;
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-12-01 09:31:23 -06:00
|
|
|
if (hsaKmtUnmapMemoryToGPU(ptr) != HSAKMT_STATUS_SUCCESS)
|
|
|
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
|
|
|
if (hsaKmtDeregisterMemory(ptr) != HSAKMT_STATUS_SUCCESS)
|
|
|
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
|
|
|
return HSA_STATUS_SUCCESS;
|
2016-11-10 02:51:19 -06:00
|
|
|
}
|
|
|
|
|
|
2016-03-22 18:10:13 -05:00
|
|
|
void Runtime::AsyncEventsLoop(void*) {
|
|
|
|
|
auto& async_events_control_ = runtime_singleton_->async_events_control_;
|
|
|
|
|
auto& async_events_ = runtime_singleton_->async_events_;
|
|
|
|
|
auto& new_async_events_ = runtime_singleton_->new_async_events_;
|
|
|
|
|
|
|
|
|
|
while (!async_events_control_.exit) {
|
|
|
|
|
// Wait for a signal
|
|
|
|
|
hsa_signal_value_t value;
|
2016-06-10 17:31:13 -05:00
|
|
|
uint32_t index = AMD::hsa_amd_signal_wait_any(
|
2016-03-22 18:10:13 -05:00
|
|
|
uint32_t(async_events_.Size()), &async_events_.signal_[0],
|
|
|
|
|
&async_events_.cond_[0], &async_events_.value_[0], uint64_t(-1),
|
|
|
|
|
HSA_WAIT_STATE_BLOCKED, &value);
|
|
|
|
|
|
|
|
|
|
// Reset the control signal
|
|
|
|
|
if (index == 0) {
|
|
|
|
|
hsa_signal_handle(async_events_control_.wake)->StoreRelaxed(0);
|
|
|
|
|
} else if (index != -1) {
|
|
|
|
|
// No error or timout occured, process the handler
|
|
|
|
|
assert(async_events_.handler_[index] != NULL);
|
|
|
|
|
bool keep =
|
|
|
|
|
async_events_.handler_[index](value, async_events_.arg_[index]);
|
|
|
|
|
if (!keep) {
|
|
|
|
|
hsa_signal_handle(async_events_.signal_[index])->Release();
|
|
|
|
|
async_events_.CopyIndex(index, async_events_.Size() - 1);
|
|
|
|
|
async_events_.PopBack();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check for dead signals
|
|
|
|
|
index = 0;
|
|
|
|
|
while (index != async_events_.Size()) {
|
|
|
|
|
if (!hsa_signal_handle(async_events_.signal_[index])->IsValid()) {
|
|
|
|
|
hsa_signal_handle(async_events_.signal_[index])->Release();
|
|
|
|
|
async_events_.CopyIndex(index, async_events_.Size() - 1);
|
|
|
|
|
async_events_.PopBack();
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
index++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Insert new signals and find plain functions
|
|
|
|
|
typedef std::pair<void (*)(void*), void*> func_arg_t;
|
|
|
|
|
std::vector<func_arg_t> functions;
|
|
|
|
|
{
|
|
|
|
|
ScopedAcquire<KernelMutex> scope_lock(&async_events_control_.lock);
|
|
|
|
|
for (size_t i = 0; i < new_async_events_.Size(); i++) {
|
|
|
|
|
if (new_async_events_.signal_[i].handle == 0) {
|
|
|
|
|
functions.push_back(
|
|
|
|
|
func_arg_t((void (*)(void*))new_async_events_.handler_[i],
|
|
|
|
|
new_async_events_.arg_[i]));
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
async_events_.PushBack(
|
|
|
|
|
new_async_events_.signal_[i], new_async_events_.cond_[i],
|
|
|
|
|
new_async_events_.value_[i], new_async_events_.handler_[i],
|
|
|
|
|
new_async_events_.arg_[i]);
|
|
|
|
|
}
|
|
|
|
|
new_async_events_.Clear();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Call plain functions
|
|
|
|
|
for (size_t i = 0; i < functions.size(); i++)
|
|
|
|
|
functions[i].first(functions[i].second);
|
|
|
|
|
functions.clear();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Release wait count of all pending signals
|
|
|
|
|
for (size_t i = 1; i < async_events_.Size(); i++)
|
|
|
|
|
hsa_signal_handle(async_events_.signal_[i])->Release();
|
|
|
|
|
async_events_.Clear();
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < new_async_events_.Size(); i++)
|
|
|
|
|
hsa_signal_handle(new_async_events_.signal_[i])->Release();
|
|
|
|
|
new_async_events_.Clear();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Runtime::BindVmFaultHandler() {
|
2018-11-09 03:07:10 -06:00
|
|
|
if (core::g_use_interrupt_wait && !gpu_agents_.empty()) {
|
2016-03-22 18:10:13 -05:00
|
|
|
// Create memory event with manual reset to avoid racing condition
|
|
|
|
|
// with driver in case of multiple concurrent VM faults.
|
|
|
|
|
vm_fault_event_ =
|
|
|
|
|
core::InterruptSignal::CreateEvent(HSA_EVENTTYPE_MEMORY, true);
|
|
|
|
|
|
|
|
|
|
// Create an interrupt signal object to contain the memory event.
|
|
|
|
|
// This signal object will be registered with the async handler global
|
|
|
|
|
// thread.
|
|
|
|
|
vm_fault_signal_ = new core::InterruptSignal(0, vm_fault_event_);
|
|
|
|
|
|
|
|
|
|
if (!vm_fault_signal_->IsValid() || vm_fault_signal_->EopEvent() == NULL) {
|
|
|
|
|
assert(false && "Failed on creating VM fault signal");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2017-06-14 14:14:38 -04:00
|
|
|
SetAsyncSignalHandler(core::Signal::Convert(vm_fault_signal_),
|
|
|
|
|
HSA_SIGNAL_CONDITION_NE, 0, VMFaultHandler,
|
|
|
|
|
reinterpret_cast<void*>(vm_fault_signal_));
|
2016-03-22 18:10:13 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool Runtime::VMFaultHandler(hsa_signal_value_t val, void* arg) {
|
2017-06-14 14:14:38 -04:00
|
|
|
core::InterruptSignal* vm_fault_signal =
|
|
|
|
|
reinterpret_cast<core::InterruptSignal*>(arg);
|
2016-03-22 18:10:13 -05:00
|
|
|
|
2017-06-14 14:14:38 -04:00
|
|
|
assert(vm_fault_signal != NULL);
|
2016-03-22 18:10:13 -05:00
|
|
|
|
2017-06-14 14:14:38 -04:00
|
|
|
if (vm_fault_signal == NULL) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2017-09-29 12:45:24 -04:00
|
|
|
HsaEvent* vm_fault_event = vm_fault_signal->EopEvent();
|
|
|
|
|
|
|
|
|
|
HsaMemoryAccessFault& fault =
|
|
|
|
|
vm_fault_event->EventData.EventData.MemoryAccessFault;
|
|
|
|
|
|
|
|
|
|
hsa_status_t custom_handler_status = HSA_STATUS_ERROR;
|
2019-02-19 22:06:27 -06:00
|
|
|
auto system_event_handlers = runtime_singleton_->GetSystemEventHandlers();
|
2017-09-29 12:45:24 -04:00
|
|
|
// If custom handler is registered, pack the fault info and call the handler
|
2019-02-19 22:06:27 -06:00
|
|
|
if (!system_event_handlers.empty()) {
|
|
|
|
|
hsa_amd_event_t memory_fault_event;
|
|
|
|
|
memory_fault_event.event_type = HSA_AMD_GPU_MEMORY_FAULT_EVENT;
|
|
|
|
|
hsa_amd_gpu_memory_fault_info_t& fault_info = memory_fault_event.memory_fault;
|
2017-06-14 14:14:38 -04:00
|
|
|
|
2017-09-29 12:45:24 -04:00
|
|
|
// Find the faulty agent
|
|
|
|
|
auto it = runtime_singleton_->agents_by_node_.find(fault.NodeId);
|
|
|
|
|
assert(it != runtime_singleton_->agents_by_node_.end() && "Can't find faulty agent.");
|
|
|
|
|
Agent* faulty_agent = it->second.front();
|
2018-04-04 15:47:03 -04:00
|
|
|
fault_info.agent = Agent::Convert(faulty_agent);
|
2017-06-14 14:14:38 -04:00
|
|
|
|
2018-04-04 15:47:03 -04:00
|
|
|
fault_info.virtual_address = fault.VirtualAddress;
|
2019-02-19 22:06:27 -06:00
|
|
|
fault_info.fault_reason_mask = 0;
|
2017-06-14 14:14:38 -04:00
|
|
|
if (fault.Failure.NotPresent == 1) {
|
2019-02-19 22:06:27 -06:00
|
|
|
fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_PAGE_NOT_PRESENT;
|
2017-09-29 12:45:24 -04:00
|
|
|
}
|
|
|
|
|
if (fault.Failure.ReadOnly == 1) {
|
2019-02-19 22:06:27 -06:00
|
|
|
fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_READ_ONLY;
|
2017-09-29 12:45:24 -04:00
|
|
|
}
|
|
|
|
|
if (fault.Failure.NoExecute == 1) {
|
2019-02-19 22:06:27 -06:00
|
|
|
fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_NX;
|
2017-09-29 12:45:24 -04:00
|
|
|
}
|
|
|
|
|
if (fault.Failure.GpuAccess == 1) {
|
2019-02-19 22:06:27 -06:00
|
|
|
fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_HOST_ONLY;
|
2017-09-29 12:45:24 -04:00
|
|
|
}
|
|
|
|
|
if (fault.Failure.Imprecise == 1) {
|
2019-02-19 22:06:27 -06:00
|
|
|
fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_IMPRECISE;
|
|
|
|
|
}
|
|
|
|
|
if (fault.Failure.ECC == 1 && fault.Failure.ErrorType == 0) {
|
|
|
|
|
fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_DRAM_ECC;
|
|
|
|
|
}
|
|
|
|
|
if (fault.Failure.ErrorType == 1) {
|
|
|
|
|
fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_SRAM_ECC;
|
|
|
|
|
}
|
|
|
|
|
if (fault.Failure.ErrorType == 2) {
|
|
|
|
|
fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_DRAM_ECC;
|
|
|
|
|
}
|
|
|
|
|
if (fault.Failure.ErrorType == 3) {
|
|
|
|
|
fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_HANG;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (auto& callback : system_event_handlers) {
|
|
|
|
|
hsa_status_t err = callback.first(&memory_fault_event, callback.second);
|
|
|
|
|
if (err == HSA_STATUS_SUCCESS) custom_handler_status = HSA_STATUS_SUCCESS;
|
2017-06-14 14:14:38 -04:00
|
|
|
}
|
|
|
|
|
}
|
2016-03-22 18:10:13 -05:00
|
|
|
|
2017-09-29 12:45:24 -04:00
|
|
|
// No custom VM fault handler registered or it failed.
|
|
|
|
|
if (custom_handler_status != HSA_STATUS_SUCCESS) {
|
|
|
|
|
if (runtime_singleton_->flag().enable_vm_fault_message()) {
|
|
|
|
|
std::string reason = "";
|
|
|
|
|
if (fault.Failure.NotPresent == 1) {
|
|
|
|
|
reason += "Page not present or supervisor privilege";
|
|
|
|
|
} else if (fault.Failure.ReadOnly == 1) {
|
|
|
|
|
reason += "Write access to a read-only page";
|
|
|
|
|
} else if (fault.Failure.NoExecute == 1) {
|
|
|
|
|
reason += "Execute access to a page marked NX";
|
|
|
|
|
} else if (fault.Failure.GpuAccess == 1) {
|
|
|
|
|
reason += "Host access only";
|
2019-02-19 22:06:27 -06:00
|
|
|
} else if ((fault.Failure.ECC == 1 && fault.Failure.ErrorType == 0) ||
|
|
|
|
|
fault.Failure.ErrorType == 2) {
|
|
|
|
|
reason += "DRAM ECC failure";
|
|
|
|
|
} else if (fault.Failure.ErrorType == 1) {
|
|
|
|
|
reason += "SRAM ECC failure";
|
|
|
|
|
} else if (fault.Failure.ErrorType == 3) {
|
|
|
|
|
reason += "Generic hang recovery";
|
2017-09-29 12:45:24 -04:00
|
|
|
} else {
|
|
|
|
|
reason += "Unknown";
|
|
|
|
|
}
|
2016-03-22 18:10:13 -05:00
|
|
|
|
2017-10-25 20:08:36 -05:00
|
|
|
core::Agent* faultingAgent = runtime_singleton_->agents_by_node_[fault.NodeId][0];
|
|
|
|
|
|
|
|
|
|
fprintf(
|
|
|
|
|
stderr,
|
|
|
|
|
"Memory access fault by GPU node-%u (Agent handle: %p) on address %p%s. Reason: %s.\n",
|
|
|
|
|
fault.NodeId, reinterpret_cast<void*>(faultingAgent->public_handle().handle),
|
|
|
|
|
reinterpret_cast<const void*>(fault.VirtualAddress),
|
|
|
|
|
(fault.Failure.Imprecise == 1) ? "(may not be exact address)" : "", reason.c_str());
|
|
|
|
|
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
|
runtime_singleton_->memory_lock_.Acquire();
|
|
|
|
|
auto it = runtime_singleton_->allocation_map_.upper_bound(
|
|
|
|
|
reinterpret_cast<void*>(fault.VirtualAddress));
|
|
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
|
|
if (it != runtime_singleton_->allocation_map_.begin()) it--;
|
|
|
|
|
}
|
|
|
|
|
fprintf(stderr, "Nearby memory map:\n");
|
|
|
|
|
auto start = it;
|
|
|
|
|
for (int i = 0; i < 3; i++) {
|
|
|
|
|
if (it == runtime_singleton_->allocation_map_.end()) break;
|
|
|
|
|
std::string kind = "Non-HSA";
|
|
|
|
|
if (it->second.region != nullptr) {
|
|
|
|
|
const amd::MemoryRegion* region =
|
|
|
|
|
static_cast<const amd::MemoryRegion*>(it->second.region);
|
|
|
|
|
if (region->IsSystem())
|
|
|
|
|
kind = "System";
|
|
|
|
|
else if (region->IsLocalMemory())
|
|
|
|
|
kind = "VRAM";
|
|
|
|
|
else if (region->IsScratch())
|
|
|
|
|
kind = "Scratch";
|
|
|
|
|
else if (region->IsLDS())
|
|
|
|
|
kind = "LDS";
|
|
|
|
|
}
|
|
|
|
|
fprintf(stderr, "%p, 0x%lx, %s\n", it->first, it->second.size, kind.c_str());
|
|
|
|
|
it++;
|
|
|
|
|
}
|
|
|
|
|
fprintf(stderr, "\n");
|
|
|
|
|
it = start;
|
|
|
|
|
runtime_singleton_->memory_lock_.Release();
|
|
|
|
|
hsa_amd_pointer_info_t info;
|
|
|
|
|
PtrInfoBlockData block;
|
|
|
|
|
uint32_t count;
|
|
|
|
|
hsa_agent_t* canAccess;
|
|
|
|
|
info.size = sizeof(info);
|
|
|
|
|
for (int i = 0; i < 3; i++) {
|
|
|
|
|
if (it == runtime_singleton_->allocation_map_.end()) break;
|
|
|
|
|
runtime_singleton_->PtrInfo(const_cast<void*>(it->first), &info, malloc, &count, &canAccess,
|
|
|
|
|
&block);
|
2018-01-30 18:13:47 -06:00
|
|
|
fprintf(stderr,
|
|
|
|
|
"PtrInfo:\n\tAddress: %p-%p/%p-%p\n\tSize: 0x%lx\n\tType: %u\n\tOwner: %p\n",
|
|
|
|
|
info.agentBaseAddress, (char*)info.agentBaseAddress + info.sizeInBytes,
|
|
|
|
|
info.hostBaseAddress, (char*)info.hostBaseAddress + info.sizeInBytes,
|
|
|
|
|
info.sizeInBytes, info.type, reinterpret_cast<void*>(info.agentOwner.handle));
|
2017-10-25 20:08:36 -05:00
|
|
|
fprintf(stderr, "\tCanAccess: %u\n", count);
|
|
|
|
|
for (int t = 0; t < count; t++)
|
|
|
|
|
fprintf(stderr, "\t\t%p\n", reinterpret_cast<void*>(canAccess[t].handle));
|
|
|
|
|
fprintf(stderr, "\tIn block: %p, 0x%lx\n", block.base, block.length);
|
|
|
|
|
free(canAccess);
|
|
|
|
|
it++;
|
|
|
|
|
}
|
|
|
|
|
#endif //! NDEBUG
|
2017-09-29 12:45:24 -04:00
|
|
|
}
|
2017-10-25 20:08:36 -05:00
|
|
|
assert(false && "GPU memory access fault.");
|
2017-09-29 12:45:24 -04:00
|
|
|
std::abort();
|
|
|
|
|
}
|
2017-06-14 14:14:38 -04:00
|
|
|
// No need to keep the signal because we are done.
|
2016-03-22 18:10:13 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Runtime::Runtime()
|
2018-11-09 04:36:25 -06:00
|
|
|
: region_gpu_(nullptr),
|
2016-03-22 18:10:13 -05:00
|
|
|
sys_clock_freq_(0),
|
2017-08-15 23:43:04 -05:00
|
|
|
vm_fault_event_(nullptr),
|
|
|
|
|
vm_fault_signal_(nullptr),
|
2018-11-09 04:36:25 -06:00
|
|
|
ref_count_(0) {}
|
2016-03-22 18:10:13 -05:00
|
|
|
|
2016-08-22 17:27:00 -05:00
|
|
|
hsa_status_t Runtime::Load() {
|
2016-04-01 17:13:45 -05:00
|
|
|
flag_.Refresh();
|
|
|
|
|
|
|
|
|
|
g_use_interrupt_wait = flag_.enable_interrupt();
|
2016-03-22 18:10:13 -05:00
|
|
|
|
|
|
|
|
if (!amd::Load()) {
|
2016-08-22 17:27:00 -05:00
|
|
|
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
2016-03-22 18:10:13 -05:00
|
|
|
}
|
2018-11-09 03:07:10 -06:00
|
|
|
BindVmFaultHandler();
|
2016-03-22 18:10:13 -05:00
|
|
|
|
|
|
|
|
loader_ = amd::hsa::loader::Loader::Create(&loader_context_);
|
|
|
|
|
|
|
|
|
|
// Load extensions
|
|
|
|
|
LoadExtensions();
|
|
|
|
|
|
2018-10-19 17:07:24 -05:00
|
|
|
// Initialize per GPU scratch, blits, and trap handler
|
2016-05-31 13:38:45 -05:00
|
|
|
for (core::Agent* agent : gpu_agents_) {
|
2016-08-22 17:27:00 -05:00
|
|
|
hsa_status_t status =
|
2016-07-26 15:12:25 -05:00
|
|
|
reinterpret_cast<amd::GpuAgentInt*>(agent)->PostToolsInit();
|
2016-08-22 17:27:00 -05:00
|
|
|
|
|
|
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
|
|
|
return status;
|
|
|
|
|
}
|
2016-05-31 13:38:45 -05:00
|
|
|
}
|
2016-08-22 17:27:00 -05:00
|
|
|
|
2018-10-19 17:07:24 -05:00
|
|
|
// Load tools libraries
|
|
|
|
|
LoadTools();
|
|
|
|
|
|
2016-08-22 17:27:00 -05:00
|
|
|
return HSA_STATUS_SUCCESS;
|
2016-03-22 18:10:13 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Runtime::Unload() {
|
|
|
|
|
UnloadTools();
|
|
|
|
|
UnloadExtensions();
|
|
|
|
|
|
|
|
|
|
amd::hsa::loader::Loader::Destroy(loader_);
|
|
|
|
|
loader_ = nullptr;
|
|
|
|
|
|
2018-02-28 05:56:39 -06:00
|
|
|
std::for_each(gpu_agents_.begin(), gpu_agents_.end(), DeleteObject());
|
|
|
|
|
gpu_agents_.clear();
|
|
|
|
|
|
2016-03-22 18:10:13 -05:00
|
|
|
async_events_control_.Shutdown();
|
|
|
|
|
|
2017-08-15 23:43:04 -05:00
|
|
|
if (vm_fault_signal_ != nullptr) {
|
|
|
|
|
vm_fault_signal_->DestroySignal();
|
|
|
|
|
vm_fault_signal_ = nullptr;
|
|
|
|
|
}
|
2016-03-22 18:10:13 -05:00
|
|
|
core::InterruptSignal::DestroyEvent(vm_fault_event_);
|
2017-08-15 23:43:04 -05:00
|
|
|
vm_fault_event_ = nullptr;
|
2016-03-22 18:10:13 -05:00
|
|
|
|
2018-11-08 19:09:24 -06:00
|
|
|
SharedSignalPool.clear();
|
|
|
|
|
|
2018-11-09 05:57:36 -06:00
|
|
|
EventPool.clear();
|
|
|
|
|
|
2016-03-22 18:10:13 -05:00
|
|
|
DestroyAgents();
|
|
|
|
|
|
|
|
|
|
CloseTools();
|
|
|
|
|
|
|
|
|
|
amd::Unload();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Runtime::LoadExtensions() {
|
|
|
|
|
// Load finalizer and extension library
|
|
|
|
|
#ifdef HSA_LARGE_MODEL
|
|
|
|
|
static const std::string kFinalizerLib[] = {"hsa-ext-finalize64.dll",
|
|
|
|
|
"libhsa-ext-finalize64.so.1"};
|
|
|
|
|
static const std::string kImageLib[] = {"hsa-ext-image64.dll",
|
|
|
|
|
"libhsa-ext-image64.so.1"};
|
|
|
|
|
#else
|
|
|
|
|
static const std::string kFinalizerLib[] = {"hsa-ext-finalize.dll",
|
|
|
|
|
"libhsa-ext-finalize.so.1"};
|
|
|
|
|
static const std::string kImageLib[] = {"hsa-ext-image.dll",
|
|
|
|
|
"libhsa-ext-image.so.1"};
|
|
|
|
|
#endif
|
2016-06-10 17:31:13 -05:00
|
|
|
|
|
|
|
|
// Update Hsa Api Table with handle of Image extension Apis
|
|
|
|
|
extensions_.LoadFinalizer(kFinalizerLib[os_index(os::current_os)]);
|
2017-02-04 15:08:07 -06:00
|
|
|
hsa_api_table_.LinkExts(&extensions_.finalizer_api,
|
|
|
|
|
core::HsaApiTable::HSA_EXT_FINALIZER_API_TABLE_ID);
|
2016-06-10 17:31:13 -05:00
|
|
|
|
|
|
|
|
// Update Hsa Api Table with handle of Finalizer extension Apis
|
|
|
|
|
extensions_.LoadImage(kImageLib[os_index(os::current_os)]);
|
2017-02-04 15:08:07 -06:00
|
|
|
hsa_api_table_.LinkExts(&extensions_.image_api,
|
|
|
|
|
core::HsaApiTable::HSA_EXT_IMAGE_API_TABLE_ID);
|
2016-03-22 18:10:13 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Runtime::UnloadExtensions() { extensions_.Unload(); }
|
|
|
|
|
|
|
|
|
|
static std::vector<std::string> parse_tool_names(std::string tool_names) {
|
|
|
|
|
std::vector<std::string> names;
|
|
|
|
|
std::string name = "";
|
|
|
|
|
bool quoted = false;
|
|
|
|
|
while (tool_names.size() != 0) {
|
|
|
|
|
auto index = tool_names.find_first_of(" \"\\");
|
|
|
|
|
if (index == std::string::npos) {
|
|
|
|
|
name += tool_names;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
switch (tool_names[index]) {
|
|
|
|
|
case ' ': {
|
|
|
|
|
if (!quoted) {
|
|
|
|
|
name += tool_names.substr(0, index);
|
|
|
|
|
tool_names.erase(0, index + 1);
|
|
|
|
|
names.push_back(name);
|
|
|
|
|
name = "";
|
|
|
|
|
} else {
|
|
|
|
|
name += tool_names.substr(0, index + 1);
|
|
|
|
|
tool_names.erase(0, index + 1);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case '\"': {
|
|
|
|
|
if (quoted) {
|
|
|
|
|
quoted = false;
|
|
|
|
|
name += tool_names.substr(0, index);
|
|
|
|
|
tool_names.erase(0, index + 1);
|
|
|
|
|
names.push_back(name);
|
|
|
|
|
name = "";
|
|
|
|
|
} else {
|
|
|
|
|
quoted = true;
|
|
|
|
|
tool_names.erase(0, index + 1);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case '\\': {
|
|
|
|
|
if (tool_names.size() > index + 1) {
|
|
|
|
|
name += tool_names.substr(0, index) + tool_names[index + 1];
|
|
|
|
|
tool_names.erase(0, index + 2);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
} // end switch
|
|
|
|
|
} // end while
|
|
|
|
|
|
|
|
|
|
if (name != "") names.push_back(name);
|
|
|
|
|
return names;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Runtime::LoadTools() {
|
2016-06-10 17:31:13 -05:00
|
|
|
typedef bool (*tool_init_t)(::HsaApiTable*, uint64_t, uint64_t,
|
2016-03-22 18:10:13 -05:00
|
|
|
const char* const*);
|
|
|
|
|
typedef Agent* (*tool_wrap_t)(Agent*);
|
|
|
|
|
typedef void (*tool_add_t)(Runtime*);
|
|
|
|
|
|
|
|
|
|
// Load tool libs
|
2016-04-01 17:13:45 -05:00
|
|
|
std::string tool_names = flag_.tools_lib_names();
|
2016-03-22 18:10:13 -05:00
|
|
|
if (tool_names != "") {
|
|
|
|
|
std::vector<std::string> names = parse_tool_names(tool_names);
|
|
|
|
|
std::vector<const char*> failed;
|
2017-11-22 19:11:09 -06:00
|
|
|
for (auto& name : names) {
|
|
|
|
|
os::LibHandle tool = os::LoadLib(name);
|
2016-03-22 18:10:13 -05:00
|
|
|
|
|
|
|
|
if (tool != NULL) {
|
|
|
|
|
tool_libs_.push_back(tool);
|
|
|
|
|
|
|
|
|
|
tool_init_t ld;
|
|
|
|
|
ld = (tool_init_t)os::GetExportAddress(tool, "OnLoad");
|
|
|
|
|
if (ld) {
|
2016-06-10 17:31:13 -05:00
|
|
|
if (!ld(&hsa_api_table_.hsa_api,
|
|
|
|
|
hsa_api_table_.hsa_api.version.major_id,
|
|
|
|
|
failed.size(), &failed[0])) {
|
2017-11-22 19:11:09 -06:00
|
|
|
failed.push_back(name.c_str());
|
2016-03-22 18:10:13 -05:00
|
|
|
os::CloseLib(tool);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tool_wrap_t wrap;
|
|
|
|
|
wrap = (tool_wrap_t)os::GetExportAddress(tool, "WrapAgent");
|
|
|
|
|
if (wrap) {
|
|
|
|
|
std::vector<core::Agent*>* agent_lists[2] = {&cpu_agents_,
|
|
|
|
|
&gpu_agents_};
|
|
|
|
|
for (std::vector<core::Agent*>* agent_list : agent_lists) {
|
|
|
|
|
for (size_t agent_idx = 0; agent_idx < agent_list->size();
|
|
|
|
|
++agent_idx) {
|
|
|
|
|
Agent* agent = wrap(agent_list->at(agent_idx));
|
|
|
|
|
if (agent != NULL) {
|
|
|
|
|
assert(agent->IsValid() &&
|
|
|
|
|
"Agent returned from WrapAgent is not valid");
|
|
|
|
|
agent_list->at(agent_idx) = agent;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tool_add_t add;
|
|
|
|
|
add = (tool_add_t)os::GetExportAddress(tool, "AddAgent");
|
|
|
|
|
if (add) add(this);
|
|
|
|
|
}
|
2017-08-09 21:46:00 -05:00
|
|
|
else {
|
|
|
|
|
if (flag().report_tool_load_failures())
|
2018-05-03 14:01:59 -05:00
|
|
|
fprintf(stderr, "Tool lib \"%s\" failed to load.\n", name.c_str());
|
2017-08-09 21:46:00 -05:00
|
|
|
}
|
2016-03-22 18:10:13 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Runtime::UnloadTools() {
|
|
|
|
|
typedef void (*tool_unload_t)();
|
|
|
|
|
for (size_t i = tool_libs_.size(); i != 0; i--) {
|
|
|
|
|
tool_unload_t unld;
|
|
|
|
|
unld = (tool_unload_t)os::GetExportAddress(tool_libs_[i - 1], "OnUnload");
|
|
|
|
|
if (unld) unld();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Reset API table in case some tool doesn't cleanup properly
|
|
|
|
|
hsa_api_table_.Reset();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Runtime::CloseTools() {
|
|
|
|
|
// Due to valgrind bug, runtime cannot dlclose extensions see:
|
|
|
|
|
// http://valgrind.org/docs/manual/faq.html#faq.unhelpful
|
2016-04-01 17:13:45 -05:00
|
|
|
if (!flag_.running_valgrind()) {
|
2017-11-22 19:11:09 -06:00
|
|
|
for (auto& lib : tool_libs_) os::CloseLib(lib);
|
2016-03-22 18:10:13 -05:00
|
|
|
}
|
|
|
|
|
tool_libs_.clear();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Runtime::AsyncEventsControl::Shutdown() {
|
|
|
|
|
if (async_events_thread_ != NULL) {
|
|
|
|
|
exit = true;
|
|
|
|
|
hsa_signal_handle(wake)->StoreRelaxed(1);
|
|
|
|
|
os::WaitForThread(async_events_thread_);
|
|
|
|
|
os::CloseThread(async_events_thread_);
|
|
|
|
|
async_events_thread_ = NULL;
|
|
|
|
|
HSA::hsa_signal_destroy(wake);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Runtime::AsyncEvents::PushBack(hsa_signal_t signal,
|
|
|
|
|
hsa_signal_condition_t cond,
|
|
|
|
|
hsa_signal_value_t value,
|
|
|
|
|
hsa_amd_signal_handler handler, void* arg) {
|
|
|
|
|
signal_.push_back(signal);
|
|
|
|
|
cond_.push_back(cond);
|
|
|
|
|
value_.push_back(value);
|
|
|
|
|
handler_.push_back(handler);
|
|
|
|
|
arg_.push_back(arg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Runtime::AsyncEvents::CopyIndex(size_t dst, size_t src) {
|
|
|
|
|
signal_[dst] = signal_[src];
|
|
|
|
|
cond_[dst] = cond_[src];
|
|
|
|
|
value_[dst] = value_[src];
|
|
|
|
|
handler_[dst] = handler_[src];
|
|
|
|
|
arg_[dst] = arg_[src];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t Runtime::AsyncEvents::Size() { return signal_.size(); }
|
|
|
|
|
|
|
|
|
|
void Runtime::AsyncEvents::PopBack() {
|
|
|
|
|
signal_.pop_back();
|
|
|
|
|
cond_.pop_back();
|
|
|
|
|
value_.pop_back();
|
|
|
|
|
handler_.pop_back();
|
|
|
|
|
arg_.pop_back();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Runtime::AsyncEvents::Clear() {
|
|
|
|
|
signal_.clear();
|
|
|
|
|
cond_.clear();
|
|
|
|
|
value_.clear();
|
|
|
|
|
handler_.clear();
|
|
|
|
|
arg_.clear();
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-04 15:47:03 -04:00
|
|
|
hsa_status_t Runtime::SetCustomSystemEventHandler(hsa_amd_system_event_callback_t callback,
|
|
|
|
|
void* data) {
|
2019-02-19 22:06:27 -06:00
|
|
|
ScopedAcquire<KernelMutex> lock(&system_event_lock_);
|
|
|
|
|
system_event_handlers_.push_back(
|
|
|
|
|
std::make_pair(AMD::callback_t<hsa_amd_system_event_callback_t>(callback), data));
|
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<std::pair<AMD::callback_t<hsa_amd_system_event_callback_t>, void*>>
|
|
|
|
|
Runtime::GetSystemEventHandlers() {
|
|
|
|
|
ScopedAcquire<KernelMutex> lock(&system_event_lock_);
|
|
|
|
|
return system_event_handlers_;
|
2017-09-29 12:45:24 -04:00
|
|
|
}
|
2018-04-04 15:47:03 -04:00
|
|
|
|
2018-10-19 18:30:23 -05:00
|
|
|
hsa_status_t Runtime::SetInternalQueueCreateNotifier(hsa_amd_runtime_queue_notifier callback,
|
|
|
|
|
void* user_data) {
|
|
|
|
|
if (internal_queue_create_notifier_) {
|
|
|
|
|
return HSA_STATUS_ERROR;
|
|
|
|
|
} else {
|
|
|
|
|
internal_queue_create_notifier_ = callback;
|
|
|
|
|
internal_queue_create_notifier_user_data_ = user_data;
|
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Runtime::InternalQueueCreateNotify(const hsa_queue_t* queue, hsa_agent_t agent) {
|
|
|
|
|
if (internal_queue_create_notifier_)
|
|
|
|
|
internal_queue_create_notifier_(queue, agent, internal_queue_create_notifier_user_data_);
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-22 18:10:13 -05:00
|
|
|
} // namespace core
|