rocr: Generalize AMD::MemoryRegion Allocate and Free
Remove KFD-specific Allocate/Free calls from the AMD::MemoryRegion. The KFD-driver-specific Allocate/Free calls are now implemented in the KfdDriver. Future changes will migrate the remaining KFD-specific calls out of AMD::MemoryRegion. This allows the MemoryRegion to be used across AMD drivers like the XDNA driver. Change-Id: Ib6a2a9e5e1a15e61644d2592beb3a8e6578c3010
Этот коммит содержится в:
@@ -49,6 +49,10 @@
|
||||
|
||||
#include "hsakmt/hsakmt.h"
|
||||
|
||||
#include "core/inc/amd_cpu_agent.h"
|
||||
#include "core/inc/amd_gpu_agent.h"
|
||||
#include "core/inc/amd_memory_region.h"
|
||||
#include "core/inc/exceptions.h"
|
||||
#include "core/inc/runtime.h"
|
||||
|
||||
namespace rocr {
|
||||
@@ -70,18 +74,155 @@ hsa_status_t KfdDriver::QueryKernelModeDriver(core::DriverQuery query) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t KfdDriver::GetMemoryProperties(uint32_t node_id,
|
||||
core::MemProperties &mprops) const {
|
||||
hsa_status_t
|
||||
KfdDriver::GetMemoryProperties(uint32_t node_id,
|
||||
core::MemoryRegion &mem_region) const {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t KfdDriver::AllocateMemory(void **mem, size_t size,
|
||||
uint32_t node_id, core::MemFlags flags) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
hsa_status_t
|
||||
KfdDriver::AllocateMemory(const core::MemoryRegion &mem_region,
|
||||
core::MemoryRegion::AllocateFlags alloc_flags,
|
||||
void **mem, size_t size, uint32_t agent_node_id) {
|
||||
const MemoryRegion &m_region(static_cast<const MemoryRegion &>(mem_region));
|
||||
HsaMemFlags kmt_alloc_flags(m_region.mem_flags());
|
||||
|
||||
kmt_alloc_flags.ui32.ExecuteAccess =
|
||||
(alloc_flags & core::MemoryRegion::AllocateExecutable ? 1 : 0);
|
||||
kmt_alloc_flags.ui32.AQLQueueMemory =
|
||||
(alloc_flags & core::MemoryRegion::AllocateDoubleMap ? 1 : 0);
|
||||
|
||||
if (m_region.IsSystem() &&
|
||||
(alloc_flags & core::MemoryRegion::AllocateNonPaged)) {
|
||||
kmt_alloc_flags.ui32.NonPaged = 1;
|
||||
}
|
||||
|
||||
// Allocating a memory handle for virtual memory
|
||||
kmt_alloc_flags.ui32.NoAddress =
|
||||
!!(alloc_flags & core::MemoryRegion::AllocateMemoryOnly);
|
||||
|
||||
// Allocate pseudo fine grain memory
|
||||
kmt_alloc_flags.ui32.CoarseGrain =
|
||||
(alloc_flags & core::MemoryRegion::AllocatePCIeRW
|
||||
? 0
|
||||
: kmt_alloc_flags.ui32.CoarseGrain);
|
||||
|
||||
kmt_alloc_flags.ui32.NoSubstitute =
|
||||
(alloc_flags & core::MemoryRegion::AllocatePinned
|
||||
? 1
|
||||
: kmt_alloc_flags.ui32.NoSubstitute);
|
||||
|
||||
kmt_alloc_flags.ui32.GTTAccess =
|
||||
(alloc_flags & core::MemoryRegion::AllocateGTTAccess
|
||||
? 1
|
||||
: kmt_alloc_flags.ui32.GTTAccess);
|
||||
|
||||
if (m_region.IsLocalMemory()) {
|
||||
// Allocate physically contiguous memory. AllocateKfdMemory function call
|
||||
// will fail if this flag is not supported in KFD.
|
||||
kmt_alloc_flags.ui32.Contiguous =
|
||||
(alloc_flags & core::MemoryRegion::AllocateContiguous
|
||||
? 1
|
||||
: kmt_alloc_flags.ui32.Contiguous);
|
||||
}
|
||||
|
||||
//// Only allow using the suballocator for ordinary VRAM.
|
||||
if (m_region.IsLocalMemory() && !kmt_alloc_flags.ui32.NoAddress) {
|
||||
bool subAllocEnabled =
|
||||
!core::Runtime::runtime_singleton_->flag().disable_fragment_alloc();
|
||||
// Avoid modifying executable or queue allocations.
|
||||
bool useSubAlloc = subAllocEnabled;
|
||||
useSubAlloc &=
|
||||
((alloc_flags & (~core::MemoryRegion::AllocateRestrict)) == 0);
|
||||
|
||||
if (useSubAlloc) {
|
||||
*mem = m_region.fragment_alloc(size);
|
||||
|
||||
if ((alloc_flags & core::MemoryRegion::AllocateAsan) &&
|
||||
hsaKmtReplaceAsanHeaderPage(*mem) != HSAKMT_STATUS_SUCCESS) {
|
||||
m_region.fragment_free(*mem);
|
||||
*mem = nullptr;
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
const uint32_t node_id =
|
||||
(alloc_flags & core::MemoryRegion::AllocateGTTAccess)
|
||||
? agent_node_id
|
||||
: m_region.owner()->node_id();
|
||||
|
||||
//// Allocate memory.
|
||||
//// If it fails attempt to release memory from the block allocator and retry.
|
||||
*mem = AllocateKfdMemory(kmt_alloc_flags, node_id, size);
|
||||
if (*mem == nullptr) {
|
||||
m_region.owner()->Trim();
|
||||
*mem = AllocateKfdMemory(kmt_alloc_flags, node_id, size);
|
||||
}
|
||||
|
||||
if (*mem != nullptr) {
|
||||
if (kmt_alloc_flags.ui32.NoAddress)
|
||||
return HSA_STATUS_SUCCESS;
|
||||
|
||||
// Commit the memory.
|
||||
// For system memory, on non-restricted allocation, map it to all GPUs. On
|
||||
// restricted allocation, only CPU is allowed to access by default, so
|
||||
// no need to map
|
||||
// For local memory, only map it to the owning GPU. Mapping to other GPU,
|
||||
// if the access is allowed, is performed on AllowAccess.
|
||||
HsaMemMapFlags map_flag = m_region.map_flags();
|
||||
size_t map_node_count = 1;
|
||||
const uint32_t owner_node_id = m_region.owner()->node_id();
|
||||
const uint32_t *map_node_id = &owner_node_id;
|
||||
|
||||
if (m_region.IsSystem()) {
|
||||
if ((alloc_flags & core::MemoryRegion::AllocateRestrict) == 0) {
|
||||
// Map to all GPU agents.
|
||||
map_node_count = core::Runtime::runtime_singleton_->gpu_ids().size();
|
||||
|
||||
if (map_node_count == 0) {
|
||||
// No need to pin since no GPU in the platform.
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
map_node_id = &core::Runtime::runtime_singleton_->gpu_ids()[0];
|
||||
} else {
|
||||
// No need to pin it for CPU exclusive access.
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t alternate_va = 0;
|
||||
const bool is_resident = MakeKfdMemoryResident(
|
||||
map_node_count, map_node_id, *mem, size, &alternate_va, map_flag);
|
||||
|
||||
const bool require_pinning =
|
||||
(!m_region.full_profile() || m_region.IsLocalMemory() ||
|
||||
m_region.IsScratch());
|
||||
|
||||
if (require_pinning && !is_resident) {
|
||||
FreeKfdMemory(*mem, size);
|
||||
*mem = nullptr;
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
if ((alloc_flags & core::MemoryRegion::AllocateAsan) &&
|
||||
hsaKmtReplaceAsanHeaderPage(*mem) != HSAKMT_STATUS_SUCCESS) {
|
||||
FreeKfdMemory(*mem, size);
|
||||
*mem = nullptr;
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
hsa_status_t KfdDriver::FreeMemory(void *mem, uint32_t node_id) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
hsa_status_t KfdDriver::FreeMemory(void *mem, size_t size) {
|
||||
MakeKfdMemoryUnresident(mem);
|
||||
return FreeKfdMemory(mem, size) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
hsa_status_t KfdDriver::CreateQueue(core::Queue &queue) {
|
||||
@@ -92,5 +233,45 @@ hsa_status_t KfdDriver::DestroyQueue(core::Queue &queue) const {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void *KfdDriver::AllocateKfdMemory(const HsaMemFlags &flags, uint32_t node_id,
|
||||
size_t size) {
|
||||
void *mem = nullptr;
|
||||
const HSAKMT_STATUS status = hsaKmtAllocMemory(node_id, size, flags, &mem);
|
||||
return (status == HSAKMT_STATUS_SUCCESS) ? mem : nullptr;
|
||||
}
|
||||
|
||||
bool KfdDriver::FreeKfdMemory(void *mem, size_t size) {
|
||||
if (mem == nullptr || size == 0) {
|
||||
debug_print("Invalid free ptr:%p size:%lu\n", mem, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (hsaKmtFreeMemory(mem, size) != HSAKMT_STATUS_SUCCESS) {
|
||||
debug_print("Failed to free ptr:%p size:%lu\n", mem, size);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool KfdDriver::MakeKfdMemoryResident(size_t num_node, const uint32_t *nodes,
|
||||
const void *mem, size_t size,
|
||||
uint64_t *alternate_va,
|
||||
HsaMemMapFlags map_flag) {
|
||||
assert(num_node > 0);
|
||||
assert(nodes);
|
||||
|
||||
*alternate_va = 0;
|
||||
|
||||
HSAKMT_STATUS kmt_status(hsaKmtMapMemoryToGPUNodes(
|
||||
const_cast<void *>(mem), size, alternate_va, map_flag, num_node,
|
||||
const_cast<uint32_t *>(nodes)));
|
||||
|
||||
return (kmt_status == HSAKMT_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
void KfdDriver::MakeKfdMemoryUnresident(const void *mem) {
|
||||
hsaKmtUnmapMemoryToGPU(const_cast<void *>(mem));
|
||||
}
|
||||
|
||||
} // namespace AMD
|
||||
} // namespace rocr
|
||||
|
||||
@@ -47,6 +47,7 @@
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "core/inc/amd_memory_region.h"
|
||||
#include "core/inc/runtime.h"
|
||||
#include "uapi/amdxdna_accel.h"
|
||||
|
||||
@@ -89,17 +90,18 @@ hsa_status_t XdnaDriver::QueryKernelModeDriver(core::DriverQuery query) {
|
||||
|
||||
hsa_status_t
|
||||
XdnaDriver::GetMemoryProperties(uint32_t node_id,
|
||||
core::MemProperties &mprops) const {
|
||||
core::MemoryRegion &mem_region) const {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t XdnaDriver::AllocateMemory(void **mem, size_t size,
|
||||
uint32_t node_id,
|
||||
core::MemFlags flags) {
|
||||
hsa_status_t
|
||||
XdnaDriver::AllocateMemory(const core::MemoryRegion &mem_region,
|
||||
core::MemoryRegion::AllocateFlags alloc_flags,
|
||||
void **mem, size_t size, uint32_t node_id) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t XdnaDriver::FreeMemory(void *mem, uint32_t node_id) {
|
||||
hsa_status_t XdnaDriver::FreeMemory(void *mem, size_t size) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -49,11 +49,12 @@
|
||||
#include <vector>
|
||||
|
||||
#include "core/inc/checked.h"
|
||||
#include "core/inc/driver.h"
|
||||
#include "core/inc/isa.h"
|
||||
#include "core/inc/queue.h"
|
||||
#include "core/inc/memory_region.h"
|
||||
#include "core/util/utils.h"
|
||||
#include "core/inc/queue.h"
|
||||
#include "core/util/locks.h"
|
||||
#include "core/util/utils.h"
|
||||
|
||||
namespace rocr {
|
||||
|
||||
@@ -117,19 +118,18 @@ class Agent : public Checked<0xF6BC25EB17E6F917> {
|
||||
// @brief Agent class contructor.
|
||||
//
|
||||
// @param [in] type CPU or GPU or other.
|
||||
explicit Agent(uint32_t node_id, DeviceType type)
|
||||
: node_id_(node_id),
|
||||
device_type_(uint32_t(type)),
|
||||
profiling_enabled_(false),
|
||||
enabled_(false) {
|
||||
explicit Agent(DriverType drv_type, uint32_t node_id, DeviceType type)
|
||||
: driver_type(drv_type), node_id_(node_id), device_type_(uint32_t(type)),
|
||||
profiling_enabled_(false), enabled_(false) {
|
||||
public_handle_ = Convert(this);
|
||||
}
|
||||
|
||||
// @brief Agent class contructor.
|
||||
//
|
||||
// @param [in] type CPU or GPU or other.
|
||||
explicit Agent(uint32_t node_id, uint32_t type)
|
||||
: node_id_(node_id), device_type_(type), profiling_enabled_(false) {
|
||||
explicit Agent(DriverType drv_type, uint32_t node_id, uint32_t type)
|
||||
: driver_type(drv_type), node_id_(node_id), device_type_(type),
|
||||
profiling_enabled_(false) {
|
||||
public_handle_ = Convert(this);
|
||||
}
|
||||
|
||||
@@ -315,7 +315,9 @@ class Agent : public Checked<0xF6BC25EB17E6F917> {
|
||||
for (auto region : regions()) region->Trim();
|
||||
}
|
||||
|
||||
protected:
|
||||
const DriverType driver_type;
|
||||
|
||||
protected:
|
||||
// Intention here is to have a polymorphic update procedure for public_handle_
|
||||
// which is callable on any Agent* but only from some class dervied from
|
||||
// Agent*. do_set_public_handle should remain protected or private in all
|
||||
|
||||
@@ -51,15 +51,16 @@
|
||||
|
||||
#include "hsakmt/hsakmt.h"
|
||||
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/agent.h"
|
||||
#include "core/inc/blit.h"
|
||||
#include "core/inc/signal.h"
|
||||
#include "core/inc/cache.h"
|
||||
#include "core/inc/driver.h"
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/scratch_cache.h"
|
||||
#include "core/util/small_heap.h"
|
||||
#include "core/util/locks.h"
|
||||
#include "core/inc/signal.h"
|
||||
#include "core/util/lazy_ptr.h"
|
||||
#include "core/util/locks.h"
|
||||
#include "core/util/small_heap.h"
|
||||
#include "pcs/pcs_runtime.h"
|
||||
|
||||
namespace rocr {
|
||||
@@ -72,142 +73,154 @@ typedef ScratchCache::ScratchInfo ScratchInfo;
|
||||
class GpuAgentInt : public core::Agent {
|
||||
public:
|
||||
// @brief Constructor
|
||||
GpuAgentInt(uint32_t node_id)
|
||||
: core::Agent(node_id,core::Agent::DeviceType::kAmdGpuDevice) {}
|
||||
GpuAgentInt(uint32_t node_id)
|
||||
: core::Agent(core::DriverType::KFD, node_id,
|
||||
core::Agent::DeviceType::kAmdGpuDevice) {}
|
||||
|
||||
// @brief Ensure blits are ready (performance hint).
|
||||
virtual void PreloadBlits() {}
|
||||
// @brief Ensure blits are ready (performance hint).
|
||||
virtual void PreloadBlits() {}
|
||||
|
||||
// @brief Initialization hook invoked after tools library has loaded,
|
||||
// to allow tools interception of interface functions.
|
||||
//
|
||||
// @retval HSA_STATUS_SUCCESS if initialization is successful.
|
||||
virtual hsa_status_t PostToolsInit() = 0;
|
||||
// @brief Initialization hook invoked after tools library has loaded,
|
||||
// to allow tools interception of interface functions.
|
||||
//
|
||||
// @retval HSA_STATUS_SUCCESS if initialization is successful.
|
||||
virtual hsa_status_t PostToolsInit() = 0;
|
||||
|
||||
// @brief Invoke the user provided callback for each region accessible by
|
||||
// this agent.
|
||||
//
|
||||
// @param [in] include_peer If true, the callback will be also invoked on each
|
||||
// peer memory region accessible by this agent. If false, only invoke the
|
||||
// callback on memory region owned by this agent.
|
||||
// @param [in] callback User provided callback function.
|
||||
// @param [in] data User provided pointer as input for @p callback.
|
||||
//
|
||||
// @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed
|
||||
// region returns ::HSA_STATUS_SUCCESS.
|
||||
virtual hsa_status_t VisitRegion(bool include_peer,
|
||||
hsa_status_t (*callback)(hsa_region_t region,
|
||||
void* data),
|
||||
void* data) const = 0;
|
||||
// @brief Invoke the user provided callback for each region accessible by
|
||||
// this agent.
|
||||
//
|
||||
// @param [in] include_peer If true, the callback will be also invoked on
|
||||
// each peer memory region accessible by this agent. If false, only invoke
|
||||
// the callback on memory region owned by this agent.
|
||||
// @param [in] callback User provided callback function.
|
||||
// @param [in] data User provided pointer as input for @p callback.
|
||||
//
|
||||
// @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed
|
||||
// region returns ::HSA_STATUS_SUCCESS.
|
||||
virtual hsa_status_t
|
||||
VisitRegion(bool include_peer,
|
||||
hsa_status_t (*callback)(hsa_region_t region, void *data),
|
||||
void *data) const = 0;
|
||||
|
||||
// @brief Carve scratch memory for main from scratch pool.
|
||||
//
|
||||
// @param [in/out] scratch Structure to be populated with the carved memory
|
||||
// information.
|
||||
virtual void AcquireQueueMainScratch(ScratchInfo& scratch) = 0;
|
||||
// @brief Carve scratch memory for main from scratch pool.
|
||||
//
|
||||
// @param [in/out] scratch Structure to be populated with the carved memory
|
||||
// information.
|
||||
virtual void AcquireQueueMainScratch(ScratchInfo &scratch) = 0;
|
||||
|
||||
// @brief Carve scratch memory for alt from scratch pool.
|
||||
//
|
||||
// @param [in/out] scratch Structure to be populated with the carved memory
|
||||
// information.
|
||||
virtual void AcquireQueueAltScratch(ScratchInfo& scratch) = 0;
|
||||
// @brief Carve scratch memory for alt from scratch pool.
|
||||
//
|
||||
// @param [in/out] scratch Structure to be populated with the carved memory
|
||||
// information.
|
||||
virtual void AcquireQueueAltScratch(ScratchInfo &scratch) = 0;
|
||||
|
||||
// @brief Release scratch memory from main back to scratch pool.
|
||||
//
|
||||
// @param [in/out] scratch Scratch memory previously acquired with call to
|
||||
// ::AcquireQueueMainScratch.
|
||||
virtual void ReleaseQueueMainScratch(ScratchInfo& base) = 0;
|
||||
// @brief Release scratch memory from main back to scratch pool.
|
||||
//
|
||||
// @param [in/out] scratch Scratch memory previously acquired with call to
|
||||
// ::AcquireQueueMainScratch.
|
||||
virtual void ReleaseQueueMainScratch(ScratchInfo &base) = 0;
|
||||
|
||||
// @brief Release scratch memory back from alternate to scratch pool.
|
||||
//
|
||||
// @param [in/out] scratch Scratch memory previously acquired with call to
|
||||
// ::AcquireQueueAltcratch.
|
||||
virtual void ReleaseQueueAltScratch(ScratchInfo& base) = 0;
|
||||
// @brief Release scratch memory back from alternate to scratch pool.
|
||||
//
|
||||
// @param [in/out] scratch Scratch memory previously acquired with call to
|
||||
// ::AcquireQueueAltcratch.
|
||||
virtual void ReleaseQueueAltScratch(ScratchInfo &base) = 0;
|
||||
|
||||
// @brief Translate the kernel start and end dispatch timestamp from agent
|
||||
// domain to host domain.
|
||||
//
|
||||
// @param [in] signal Pointer to signal that provides the dispatch timing.
|
||||
// @param [out] time Structure to be populated with the host domain value.
|
||||
virtual void TranslateTime(core::Signal* signal,
|
||||
hsa_amd_profiling_dispatch_time_t& time) = 0;
|
||||
// @brief Translate the kernel start and end dispatch timestamp from agent
|
||||
// domain to host domain.
|
||||
//
|
||||
// @param [in] signal Pointer to signal that provides the dispatch timing.
|
||||
// @param [out] time Structure to be populated with the host domain value.
|
||||
virtual void TranslateTime(core::Signal *signal,
|
||||
hsa_amd_profiling_dispatch_time_t &time) = 0;
|
||||
|
||||
// @brief Translate the async copy start and end timestamp from agent
|
||||
// domain to host domain.
|
||||
//
|
||||
// @param [in] signal Pointer to signal that provides the async copy timing.
|
||||
// @param [out] time Structure to be populated with the host domain value.
|
||||
virtual void TranslateTime(core::Signal* signal, hsa_amd_profiling_async_copy_time_t& time) = 0;
|
||||
// @brief Translate the async copy start and end timestamp from agent
|
||||
// domain to host domain.
|
||||
//
|
||||
// @param [in] signal Pointer to signal that provides the async copy timing.
|
||||
// @param [out] time Structure to be populated with the host domain value.
|
||||
virtual void TranslateTime(core::Signal *signal,
|
||||
hsa_amd_profiling_async_copy_time_t &time) = 0;
|
||||
|
||||
// @brief Translate timestamp agent domain to host domain.
|
||||
//
|
||||
// @param [out] time Timestamp in agent domain.
|
||||
virtual uint64_t TranslateTime(uint64_t tick) = 0;
|
||||
// @brief Translate timestamp agent domain to host domain.
|
||||
//
|
||||
// @param [out] time Timestamp in agent domain.
|
||||
virtual uint64_t TranslateTime(uint64_t tick) = 0;
|
||||
|
||||
// @brief Invalidate caches on the agent which may hold code object data.
|
||||
virtual void InvalidateCodeCaches() = 0;
|
||||
// @brief Invalidate caches on the agent which may hold code object data.
|
||||
virtual void InvalidateCodeCaches() = 0;
|
||||
|
||||
// @brief Sets the coherency type of this agent.
|
||||
//
|
||||
// @param [in] type New coherency type.
|
||||
//
|
||||
// @retval true The new coherency type is set successfuly.
|
||||
virtual bool current_coherency_type(hsa_amd_coherency_type_t type) = 0;
|
||||
// @brief Sets the coherency type of this agent.
|
||||
//
|
||||
// @param [in] type New coherency type.
|
||||
//
|
||||
// @retval true The new coherency type is set successfuly.
|
||||
virtual bool current_coherency_type(hsa_amd_coherency_type_t type) = 0;
|
||||
|
||||
// @brief Returns the current coherency type of this agent.
|
||||
//
|
||||
// @retval Coherency type.
|
||||
virtual hsa_amd_coherency_type_t current_coherency_type() const = 0;
|
||||
// @brief Returns the current coherency type of this agent.
|
||||
//
|
||||
// @retval Coherency type.
|
||||
virtual hsa_amd_coherency_type_t current_coherency_type() const = 0;
|
||||
|
||||
virtual void RegisterGangPeer(core::Agent& gang_peer, unsigned int bandwidth_factor) = 0;
|
||||
virtual void RegisterGangPeer(core::Agent &gang_peer,
|
||||
unsigned int bandwidth_factor) = 0;
|
||||
|
||||
virtual void RegisterRecSdmaEngIdMaskPeer(core::Agent& gang_peer, uint32_t rec_sdma_eng_id_mask) = 0;
|
||||
virtual void RegisterRecSdmaEngIdMaskPeer(core::Agent &gang_peer,
|
||||
uint32_t rec_sdma_eng_id_mask) = 0;
|
||||
|
||||
// @brief Query if agent represent Kaveri GPU.
|
||||
//
|
||||
// @retval true if agent is Kaveri GPU.
|
||||
virtual bool is_kv_device() const = 0;
|
||||
// @brief Query if agent represent Kaveri GPU.
|
||||
//
|
||||
// @retval true if agent is Kaveri GPU.
|
||||
virtual bool is_kv_device() const = 0;
|
||||
|
||||
// @brief Query the agent HSA profile.
|
||||
//
|
||||
// @retval HSA profile.
|
||||
virtual hsa_profile_t profile() const = 0;
|
||||
// @brief Query the agent HSA profile.
|
||||
//
|
||||
// @retval HSA profile.
|
||||
virtual hsa_profile_t profile() const = 0;
|
||||
|
||||
// @brief Query the agent memory bus width in bit.
|
||||
//
|
||||
// @retval Bus width in bit.
|
||||
virtual uint32_t memory_bus_width() const = 0;
|
||||
// @brief Query the agent memory bus width in bit.
|
||||
//
|
||||
// @retval Bus width in bit.
|
||||
virtual uint32_t memory_bus_width() const = 0;
|
||||
|
||||
// @brief Query the agent memory maximum frequency in MHz.
|
||||
//
|
||||
// @retval Bus width in MHz.
|
||||
virtual uint32_t memory_max_frequency() const = 0;
|
||||
// @brief Query the agent memory maximum frequency in MHz.
|
||||
//
|
||||
// @retval Bus width in MHz.
|
||||
virtual uint32_t memory_max_frequency() const = 0;
|
||||
|
||||
// @brief Whether agent supports asynchronous scratch reclaim. Depends on CP FW
|
||||
virtual bool AsyncScratchReclaimEnabled() const = 0;
|
||||
// @brief Whether agent supports asynchronous scratch reclaim. Depends on CP
|
||||
// FW
|
||||
virtual bool AsyncScratchReclaimEnabled() const = 0;
|
||||
|
||||
// @brief Update the agent's scratch use-once threshold.
|
||||
// Only valid when async scratch reclaim is supported
|
||||
// @retval HSA_STATUS_SUCCESS if successful
|
||||
virtual hsa_status_t SetAsyncScratchThresholds(size_t use_once_limit) = 0;
|
||||
// @brief Update the agent's scratch use-once threshold.
|
||||
// Only valid when async scratch reclaim is supported
|
||||
// @retval HSA_STATUS_SUCCESS if successful
|
||||
virtual hsa_status_t SetAsyncScratchThresholds(size_t use_once_limit) = 0;
|
||||
|
||||
// @brief Iterate through supported PC Sampling configurations
|
||||
// @retval HSA_STATUS_SUCCESS if successful
|
||||
virtual hsa_status_t PcSamplingIterateConfig(hsa_ven_amd_pcs_iterate_configuration_callback_t cb,
|
||||
void* cb_data) = 0;
|
||||
// @brief Iterate through supported PC Sampling configurations
|
||||
// @retval HSA_STATUS_SUCCESS if successful
|
||||
virtual hsa_status_t
|
||||
PcSamplingIterateConfig(hsa_ven_amd_pcs_iterate_configuration_callback_t cb,
|
||||
void *cb_data) = 0;
|
||||
|
||||
virtual hsa_status_t PcSamplingCreate(pcs::PcsRuntime::PcSamplingSession& session) = 0;
|
||||
virtual hsa_status_t
|
||||
PcSamplingCreate(pcs::PcsRuntime::PcSamplingSession &session) = 0;
|
||||
|
||||
virtual hsa_status_t PcSamplingCreateFromId(HsaPcSamplingTraceId pcsId,
|
||||
pcs::PcsRuntime::PcSamplingSession& session) = 0;
|
||||
virtual hsa_status_t
|
||||
PcSamplingCreateFromId(HsaPcSamplingTraceId pcsId,
|
||||
pcs::PcsRuntime::PcSamplingSession &session) = 0;
|
||||
|
||||
virtual hsa_status_t PcSamplingDestroy(pcs::PcsRuntime::PcSamplingSession& session) = 0;
|
||||
virtual hsa_status_t
|
||||
PcSamplingDestroy(pcs::PcsRuntime::PcSamplingSession &session) = 0;
|
||||
|
||||
virtual hsa_status_t PcSamplingStart(pcs::PcsRuntime::PcSamplingSession& session) = 0;
|
||||
virtual hsa_status_t
|
||||
PcSamplingStart(pcs::PcsRuntime::PcSamplingSession &session) = 0;
|
||||
|
||||
virtual hsa_status_t PcSamplingStop(pcs::PcsRuntime::PcSamplingSession& session) = 0;
|
||||
virtual hsa_status_t
|
||||
PcSamplingStop(pcs::PcsRuntime::PcSamplingSession &session) = 0;
|
||||
|
||||
virtual hsa_status_t PcSamplingFlush(pcs::PcsRuntime::PcSamplingSession& session) = 0;
|
||||
virtual hsa_status_t
|
||||
PcSamplingFlush(pcs::PcsRuntime::PcSamplingSession &session) = 0;
|
||||
};
|
||||
|
||||
class GpuAgent : public GpuAgentInt {
|
||||
|
||||
@@ -43,11 +43,21 @@
|
||||
#ifndef HSA_RUNTIME_CORE_INC_AMD_KFD_DRIVER_H_
|
||||
#define HSA_RUNTIME_CORE_INC_AMD_KFD_DRIVER_H_
|
||||
|
||||
#include "core/inc/driver.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "hsakmt/hsakmt.h"
|
||||
|
||||
#include "core/inc/driver.h"
|
||||
#include "core/inc/memory_region.h"
|
||||
|
||||
namespace rocr {
|
||||
|
||||
namespace core {
|
||||
|
||||
class Queue;
|
||||
|
||||
}
|
||||
|
||||
namespace AMD {
|
||||
|
||||
class KfdDriver : public core::Driver {
|
||||
@@ -57,13 +67,33 @@ public:
|
||||
static hsa_status_t DiscoverDriver();
|
||||
|
||||
hsa_status_t QueryKernelModeDriver(core::DriverQuery query) override;
|
||||
hsa_status_t GetMemoryProperties(uint32_t node_id,
|
||||
core::MemProperties &mprops) const override;
|
||||
hsa_status_t AllocateMemory(void **mem, size_t size, uint32_t node_id,
|
||||
core::MemFlags flags) override;
|
||||
hsa_status_t FreeMemory(void *mem, uint32_t node_id) override;
|
||||
hsa_status_t
|
||||
GetMemoryProperties(uint32_t node_id,
|
||||
core::MemoryRegion &mem_region) const override;
|
||||
hsa_status_t AllocateMemory(const core::MemoryRegion &mem_region,
|
||||
core::MemoryRegion::AllocateFlags alloc_flags,
|
||||
void **mem, size_t size,
|
||||
uint32_t node_id) override;
|
||||
hsa_status_t FreeMemory(void *mem, size_t size) override;
|
||||
hsa_status_t CreateQueue(core::Queue &queue) override;
|
||||
hsa_status_t DestroyQueue(core::Queue &queue) const override;
|
||||
|
||||
private:
|
||||
/// @brief Allocate agent accessible memory (system / local memory).
|
||||
static void *AllocateKfdMemory(const HsaMemFlags &flags, uint32_t node_id,
|
||||
size_t size);
|
||||
|
||||
/// @brief Free agent accessible memory (system / local memory).
|
||||
static bool FreeKfdMemory(void *mem, size_t size);
|
||||
|
||||
/// @brief Pin memory.
|
||||
static bool MakeKfdMemoryResident(size_t num_node, const uint32_t *nodes,
|
||||
const void *mem, size_t size,
|
||||
uint64_t *alternate_va,
|
||||
HsaMemMapFlags map_flag);
|
||||
|
||||
/// @brief Unpin memory.
|
||||
static void MakeKfdMemoryUnresident(const void *mem);
|
||||
};
|
||||
|
||||
} // namespace AMD
|
||||
|
||||
@@ -77,13 +77,6 @@ class MemoryRegion : public core::MemoryRegion {
|
||||
return reinterpret_cast<MemoryRegion*>(region.handle);
|
||||
}
|
||||
|
||||
/// @brief Allocate agent accessible memory (system / local memory).
|
||||
static void* AllocateKfdMemory(const HsaMemFlags& flag, HSAuint32 node_id,
|
||||
size_t size);
|
||||
|
||||
/// @brief Free agent accessible memory (system / local memory).
|
||||
static bool FreeKfdMemory(void* ptr, size_t size);
|
||||
|
||||
static bool RegisterMemory(void* ptr, size_t size, const HsaMemFlags& MemFlags);
|
||||
|
||||
static void DeregisterMemory(void* ptr);
|
||||
@@ -175,7 +168,15 @@ class MemoryRegion : public core::MemoryRegion {
|
||||
|
||||
__forceinline size_t GetPageSize() const { return kPageSize_; }
|
||||
|
||||
private:
|
||||
__forceinline const HsaMemFlags &mem_flags() const { return mem_flag_; }
|
||||
__forceinline const HsaMemMapFlags &map_flags() const { return map_flag_; }
|
||||
|
||||
void *fragment_alloc(size_t size) const {
|
||||
return fragment_allocator_.alloc(size);
|
||||
}
|
||||
bool fragment_free(void *mem) const { return fragment_allocator_.free(mem); }
|
||||
|
||||
private:
|
||||
const HsaMemoryProperties mem_props_;
|
||||
|
||||
HsaMemFlags mem_flag_;
|
||||
|
||||
@@ -45,8 +45,13 @@
|
||||
#include <memory>
|
||||
|
||||
#include "core/inc/driver.h"
|
||||
#include "core/inc/memory_region.h"
|
||||
|
||||
namespace rocr {
|
||||
namespace core {
|
||||
class Queue;
|
||||
}
|
||||
|
||||
namespace AMD {
|
||||
|
||||
class XdnaDriver : public core::Driver {
|
||||
@@ -57,11 +62,14 @@ public:
|
||||
static hsa_status_t DiscoverDriver();
|
||||
hsa_status_t QueryKernelModeDriver(core::DriverQuery query) override;
|
||||
|
||||
hsa_status_t GetMemoryProperties(uint32_t node_id,
|
||||
core::MemProperties &mprops) const override;
|
||||
hsa_status_t AllocateMemory(void **mem, size_t size, uint32_t node_id,
|
||||
core::MemFlags flags) override;
|
||||
hsa_status_t FreeMemory(void *mem, uint32_t node_id) override;
|
||||
hsa_status_t
|
||||
GetMemoryProperties(uint32_t node_id,
|
||||
core::MemoryRegion &mem_region) const override;
|
||||
hsa_status_t AllocateMemory(const core::MemoryRegion &mem_region,
|
||||
core::MemoryRegion::AllocateFlags alloc_flags,
|
||||
void **mem, size_t size,
|
||||
uint32_t node_id) override;
|
||||
hsa_status_t FreeMemory(void *mem, size_t size) override;
|
||||
hsa_status_t CreateQueue(core::Queue &queue) override;
|
||||
hsa_status_t DestroyQueue(core::Queue &queue) const override;
|
||||
|
||||
|
||||
@@ -46,20 +46,13 @@
|
||||
#include <limits>
|
||||
#include <string>
|
||||
|
||||
#include "core/inc/agent.h"
|
||||
#include "core/inc/memory_region.h"
|
||||
#include "inc/hsa.h"
|
||||
|
||||
namespace rocr {
|
||||
namespace core {
|
||||
|
||||
using MemFlags = uint32_t;
|
||||
|
||||
struct MemProperties {
|
||||
MemFlags flags_;
|
||||
size_t size_bytes_;
|
||||
uint64_t virtual_base_addr_;
|
||||
};
|
||||
class Queue;
|
||||
|
||||
struct DriverVersionInfo {
|
||||
uint32_t major;
|
||||
@@ -85,17 +78,27 @@ class Driver {
|
||||
/// @retval HSA_STATUS_SUCCESS if the kernel-model driver query was
|
||||
/// successful.
|
||||
virtual hsa_status_t QueryKernelModeDriver(DriverQuery query) = 0;
|
||||
|
||||
/// @brief Open a connection to the driver using name_.
|
||||
/// @retval HSA_STATUS_SUCCESS if the driver was opened successfully.
|
||||
hsa_status_t Open();
|
||||
|
||||
/// @brief Close a connection to the open driver using fd_.
|
||||
/// @retval HSA_STATUS_SUCCESS if the driver was opened successfully.
|
||||
hsa_status_t Close();
|
||||
|
||||
/// @brief Get driver version information.
|
||||
/// @retval DriverVersionInfo containing the driver's version information.
|
||||
DriverVersionInfo Version() const { return version_; }
|
||||
const DriverVersionInfo &Version() const { return version_; }
|
||||
|
||||
virtual hsa_status_t GetMemoryProperties(uint32_t node_id, MemProperties &mprops) const = 0;
|
||||
/// @brief Get the memory properties of a specific node.
|
||||
/// @param node_id Node ID of the agent
|
||||
/// @param[in, out] mem_region MemoryRegion object whose properties will be
|
||||
/// retrieved.
|
||||
/// @retval HSA_STATUS_SUCCESS if the driver sucessfully returns the node's
|
||||
/// memory properties.
|
||||
virtual hsa_status_t GetMemoryProperties(uint32_t node_id,
|
||||
MemoryRegion &mem_region) const = 0;
|
||||
|
||||
/// @brief Allocate agent-accessible memory (system or agent-local memory).
|
||||
///
|
||||
@@ -103,10 +106,12 @@ class Driver {
|
||||
///
|
||||
/// @retval HSA_STATUS_SUCCESS if memory was successfully allocated or
|
||||
/// hsa_status_t error code if the memory allocation failed.
|
||||
virtual hsa_status_t AllocateMemory(void** mem, size_t size, uint32_t node_id,
|
||||
MemFlags flags) = 0;
|
||||
virtual hsa_status_t AllocateMemory(const MemoryRegion &mem_region,
|
||||
MemoryRegion::AllocateFlags alloc_flags,
|
||||
void **mem, size_t size,
|
||||
uint32_t node_id) = 0;
|
||||
|
||||
virtual hsa_status_t FreeMemory(void* mem, uint32_t node_id) = 0;
|
||||
virtual hsa_status_t FreeMemory(void *mem, size_t size) = 0;
|
||||
|
||||
virtual hsa_status_t CreateQueue(Queue &queue) = 0;
|
||||
|
||||
|
||||
@@ -43,12 +43,14 @@
|
||||
#include "core/inc/amd_aie_agent.h"
|
||||
|
||||
#include "core/inc/amd_aie_aql_queue.h"
|
||||
#include "core/inc/driver.h"
|
||||
|
||||
namespace rocr {
|
||||
namespace AMD {
|
||||
|
||||
AieAgent::AieAgent(uint32_t node)
|
||||
: core::Agent(node, core::Agent::DeviceType::kAmdAieDevice),
|
||||
: core::Agent(core::DriverType::XDNA, node,
|
||||
core::Agent::DeviceType::kAmdAieDevice),
|
||||
max_queues_(core::Runtime::runtime_singleton_->flag().max_queues()) {
|
||||
InitRegionList();
|
||||
}
|
||||
|
||||
@@ -47,14 +47,16 @@
|
||||
#include <thread>
|
||||
|
||||
#include "core/inc/amd_memory_region.h"
|
||||
#include "core/inc/driver.h"
|
||||
#include "core/inc/host_queue.h"
|
||||
|
||||
#include "inc/hsa_ext_image.h"
|
||||
|
||||
namespace rocr {
|
||||
namespace AMD {
|
||||
CpuAgent::CpuAgent(HSAuint32 node, const HsaNodeProperties& node_props)
|
||||
: core::Agent(node, kAmdCpuDevice), properties_(node_props) {
|
||||
CpuAgent::CpuAgent(HSAuint32 node, const HsaNodeProperties &node_props)
|
||||
: core::Agent(core::DriverType::KFD, node, kAmdCpuDevice),
|
||||
properties_(node_props) {
|
||||
InitRegionList();
|
||||
|
||||
InitCacheList();
|
||||
|
||||
@@ -59,25 +59,6 @@ namespace AMD {
|
||||
size_t MemoryRegion::max_sysmem_alloc_size_ = 0;
|
||||
size_t MemoryRegion::kPageSize_ = sysconf(_SC_PAGESIZE);
|
||||
|
||||
void* MemoryRegion::AllocateKfdMemory(const HsaMemFlags& flag, HSAuint32 node_id, size_t size) {
|
||||
void* ret = NULL;
|
||||
const HSAKMT_STATUS status = hsaKmtAllocMemory(node_id, size, flag, &ret);
|
||||
return (status == HSAKMT_STATUS_SUCCESS) ? ret : NULL;
|
||||
}
|
||||
|
||||
bool MemoryRegion::FreeKfdMemory(void* ptr, size_t size) {
|
||||
if (ptr == NULL || size == 0) {
|
||||
debug_print("Invalid free ptr:%p size:%lu\n", ptr, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (hsaKmtFreeMemory(ptr, size) != HSAKMT_STATUS_SUCCESS) {
|
||||
debug_print("Failed to free ptr:%p size:%lu\n", ptr, size);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MemoryRegion::RegisterMemory(void* ptr, size_t size, const HsaMemFlags& MemFlags) {
|
||||
assert(ptr != NULL);
|
||||
assert(size != 0);
|
||||
@@ -198,112 +179,8 @@ hsa_status_t MemoryRegion::AllocateImpl(size_t& size, AllocateFlags alloc_flags,
|
||||
|
||||
size = AlignUp(size, kPageSize_);
|
||||
|
||||
HsaMemFlags kmt_alloc_flags(mem_flag_);
|
||||
kmt_alloc_flags.ui32.ExecuteAccess =
|
||||
(alloc_flags & AllocateExecutable ? 1 : 0);
|
||||
kmt_alloc_flags.ui32.AQLQueueMemory =
|
||||
(alloc_flags & AllocateDoubleMap ? 1 : 0);
|
||||
if (IsSystem() && (alloc_flags & AllocateNonPaged))
|
||||
kmt_alloc_flags.ui32.NonPaged = 1;
|
||||
|
||||
// Allocating a memory handle for virtual memory
|
||||
kmt_alloc_flags.ui32.NoAddress = !!(alloc_flags & AllocateMemoryOnly);
|
||||
|
||||
// Allocate pseudo fine grain memory
|
||||
kmt_alloc_flags.ui32.CoarseGrain = (alloc_flags & AllocatePCIeRW ? 0 : kmt_alloc_flags.ui32.CoarseGrain);
|
||||
kmt_alloc_flags.ui32.NoSubstitute = (alloc_flags & AllocatePinned ? 1 : kmt_alloc_flags.ui32.NoSubstitute);
|
||||
|
||||
kmt_alloc_flags.ui32.GTTAccess = (alloc_flags & AllocateGTTAccess ? 1 : kmt_alloc_flags.ui32.GTTAccess);
|
||||
if (IsLocalMemory()) {
|
||||
// Allocate physically contiguous memory - AllocateKfdMemory function call will fail
|
||||
// if this flag is not supported in KFD.
|
||||
kmt_alloc_flags.ui32.Contiguous =
|
||||
(alloc_flags & AllocateContiguous ? 1 : kmt_alloc_flags.ui32.Contiguous);
|
||||
}
|
||||
|
||||
// Only allow using the suballocator for ordinary VRAM.
|
||||
if (IsLocalMemory() && !kmt_alloc_flags.ui32.NoAddress) {
|
||||
bool subAllocEnabled = !core::Runtime::runtime_singleton_->flag().disable_fragment_alloc();
|
||||
// Avoid modifying executable or queue allocations.
|
||||
bool useSubAlloc = subAllocEnabled;
|
||||
useSubAlloc &= ((alloc_flags & (~AllocateRestrict)) == 0);
|
||||
if (useSubAlloc) {
|
||||
*address = fragment_allocator_.alloc(size);
|
||||
|
||||
if ((alloc_flags & AllocateAsan) &&
|
||||
hsaKmtReplaceAsanHeaderPage(*address) != HSAKMT_STATUS_SUCCESS) {
|
||||
fragment_allocator_.free(*address);
|
||||
*address = NULL;
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
const HSAuint32 node_id = (alloc_flags & AllocateGTTAccess) ? agent_node_id : owner()->node_id();
|
||||
|
||||
// Allocate memory.
|
||||
// If it fails attempt to release memory from the block allocator and retry.
|
||||
*address = AllocateKfdMemory(kmt_alloc_flags, node_id, size);
|
||||
if (*address == nullptr) {
|
||||
owner()->Trim();
|
||||
*address = AllocateKfdMemory(kmt_alloc_flags, node_id, size);
|
||||
}
|
||||
|
||||
if (*address != nullptr) {
|
||||
if (kmt_alloc_flags.ui32.NoAddress) return HSA_STATUS_SUCCESS;
|
||||
|
||||
// Commit the memory.
|
||||
// For system memory, on non-restricted allocation, map it to all GPUs. On
|
||||
// restricted allocation, only CPU is allowed to access by default, so
|
||||
// no need to map
|
||||
// For local memory, only map it to the owning GPU. Mapping to other GPU,
|
||||
// if the access is allowed, is performed on AllowAccess.
|
||||
HsaMemMapFlags map_flag = map_flag_;
|
||||
size_t map_node_count = 1;
|
||||
const uint32_t owner_node_id = owner()->node_id();
|
||||
const uint32_t* map_node_id = &owner_node_id;
|
||||
|
||||
if (IsSystem()) {
|
||||
if ((alloc_flags & AllocateRestrict) == 0) {
|
||||
// Map to all GPU agents.
|
||||
map_node_count = core::Runtime::runtime_singleton_->gpu_ids().size();
|
||||
|
||||
if (map_node_count == 0) {
|
||||
// No need to pin since no GPU in the platform.
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
map_node_id = &core::Runtime::runtime_singleton_->gpu_ids()[0];
|
||||
} else {
|
||||
// No need to pin it for CPU exclusive access.
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t alternate_va = 0;
|
||||
const bool is_resident = MakeKfdMemoryResident(
|
||||
map_node_count, map_node_id, *address, size, &alternate_va, map_flag);
|
||||
|
||||
const bool require_pinning =
|
||||
(!full_profile() || IsLocalMemory() || IsScratch());
|
||||
|
||||
if (require_pinning && !is_resident) {
|
||||
FreeKfdMemory(*address, size);
|
||||
*address = NULL;
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
if ((alloc_flags & AllocateAsan) &&
|
||||
hsaKmtReplaceAsanHeaderPage(*address) != HSAKMT_STATUS_SUCCESS) {
|
||||
FreeKfdMemory(*address, size);
|
||||
*address = NULL;
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
return core::Runtime::runtime_singleton_->AgentDriver(owner()->driver_type)
|
||||
.AllocateMemory(*this, alloc_flags, address, size, agent_node_id);
|
||||
}
|
||||
|
||||
hsa_status_t MemoryRegion::Free(void* address, size_t size) const {
|
||||
@@ -314,9 +191,8 @@ hsa_status_t MemoryRegion::Free(void* address, size_t size) const {
|
||||
hsa_status_t MemoryRegion::FreeImpl(void* address, size_t size) const {
|
||||
if (fragment_allocator_.free(address)) return HSA_STATUS_SUCCESS;
|
||||
|
||||
MakeKfdMemoryUnresident(address);
|
||||
|
||||
return FreeKfdMemory(address, size) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
|
||||
return core::Runtime::runtime_singleton_->AgentDriver(owner()->driver_type)
|
||||
.FreeMemory(address, size);
|
||||
}
|
||||
|
||||
// TODO: Look into a better name and/or making this process transparent to exporting.
|
||||
|
||||
Ссылка в новой задаче
Block a user