SWDEV-311270 - Add IPC support for memory pools
Initial implementation for hipMemPoolExportToShareableHandle,
hipMemPoolImportFromShareableHandle,
hipMemPoolExportPointer and hipMemPoolImportPointer
Change-Id: I0ebdc48e9163b394ded560adca6c38bbc5aee7d1
[ROCm/clr commit: 1a0c3e4dc4]
Этот коммит содержится в:
коммит произвёл
German Andryeyev
родитель
d6086f9d69
Коммит
af5944dc71
@@ -236,7 +236,7 @@ hipError_t hipMemPoolCreate(hipMemPool_t* mem_pool, const hipMemPoolProps* pool_
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
auto device = g_devices[pool_props->location.id];
|
||||
auto pool = new hip::MemoryPool(device);
|
||||
auto pool = new hip::MemoryPool(device, pool_props->handleTypes != hipMemHandleTypeNone);
|
||||
if (pool == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
@@ -298,7 +298,15 @@ hipError_t hipMemPoolExportToShareableHandle(
|
||||
if (mem_pool == nullptr || shared_handle == nullptr || flags == -1) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
|
||||
auto mpool = reinterpret_cast<hip::MemoryPool*>(mem_pool);
|
||||
auto handle = mpool->Export();
|
||||
if (!handle) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
*reinterpret_cast<amd::Os::FileDesc*>(shared_handle) = handle;
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
@@ -311,7 +319,26 @@ hipError_t hipMemPoolImportFromShareableHandle(
|
||||
if (mem_pool == nullptr || shared_handle == nullptr || flags == -1) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
|
||||
auto device = g_devices[0];
|
||||
auto pool = new hip::MemoryPool(device);
|
||||
if (pool == nullptr) {
|
||||
HIP_RETURN(hipErrorOutOfMemory);
|
||||
}
|
||||
// Note: The interface casts the integer value of file handle under Linux into void*,
|
||||
// but compiler may not allow to cast it back. Hence, make a cast with a union...
|
||||
union {
|
||||
amd::Os::FileDesc desc;
|
||||
void* ptr;
|
||||
} handle;
|
||||
handle.ptr = shared_handle;
|
||||
if (!pool->Import(handle.desc)) {
|
||||
pool->release();
|
||||
HIP_RETURN(hipErrorOutOfMemory);
|
||||
}
|
||||
*mem_pool = reinterpret_cast<hipMemPool_t>(pool);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
@@ -320,7 +347,22 @@ hipError_t hipMemPoolExportPointer(hipMemPoolPtrExportData* export_data, void* p
|
||||
if (export_data == nullptr || ptr == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
|
||||
size_t offset = 0;
|
||||
auto memory = getMemoryObject(ptr, offset);
|
||||
if (memory != nullptr) {
|
||||
auto id = memory->getUserData().deviceId;
|
||||
// Note: export_data must point to 64 bytes of shared memory
|
||||
auto shared = reinterpret_cast<hip::SharedMemPointer*>(export_data);
|
||||
|
||||
if (!g_devices[id]->devices()[0]->IpcCreate(ptr,
|
||||
&shared->size_, &shared->handle_[0], &shared->offset_)) {
|
||||
HIP_RETURN(hipErrorOutOfMemory);
|
||||
}
|
||||
} else {
|
||||
HIP_RETURN(hipErrorOutOfMemory);
|
||||
}
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
@@ -332,6 +374,15 @@ hipError_t hipMemPoolImportPointer(
|
||||
if (mem_pool == nullptr || export_data == nullptr || ptr == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
auto mpool = reinterpret_cast<hip::MemoryPool*>(mem_pool);
|
||||
auto shared = reinterpret_cast<hip::SharedMemPointer*>(export_data);
|
||||
if (!mpool->Device()->devices()[0]->IpcAttach(
|
||||
&shared->handle_[0], shared->size_, shared->offset_, 0, ptr)) {
|
||||
HIP_RETURN(hipErrorOutOfMemory);
|
||||
}
|
||||
size_t offset = 0;
|
||||
auto memory = getMemoryObject(*ptr, offset);
|
||||
mpool->AddBusyMemory(memory);
|
||||
mpool->retain();
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
@@ -168,8 +168,8 @@ void* MemoryPool::AllocateMemory(size_t size, hip::Stream* stream, void* dptr) {
|
||||
if (dev_info.maxMemAllocSize_ < size) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
dev_ptr = amd::SvmBuffer::malloc(*context, 0, size, dev_info.memBaseAddrAlign_, nullptr);
|
||||
cl_svm_mem_flags flags = (state_.interprocess_) ? ROCCLR_MEM_INTERPROCESS : 0;
|
||||
dev_ptr = amd::SvmBuffer::malloc(*context, flags, size, dev_info.memBaseAddrAlign_, nullptr);
|
||||
if (dev_ptr == nullptr) {
|
||||
size_t free = 0, total =0;
|
||||
hipError_t err = hipMemGetInfo(&free, &total);
|
||||
@@ -425,10 +425,56 @@ void MemoryPool::GetAccess(hip::Device* device, hipMemAccessFlags* flags) {
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void MemoryPool::FreeAllMemory(hip::Stream* stream) {
|
||||
while (!busy_heap_.Allocations().empty()) {
|
||||
FreeMemory(busy_heap_.Allocations().begin()->first, stream);
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
amd::Os::FileDesc MemoryPool::Export() {
|
||||
amd::ScopedLock lock(lock_pool_ops_);
|
||||
if (shared_ != nullptr) {
|
||||
return shared_->handle_;
|
||||
}
|
||||
|
||||
constexpr uint32_t kFileNameSize = 20;
|
||||
char file_name[kFileNameSize];
|
||||
// Generate a unique name from the mempool pointer
|
||||
// Note: Windows can accept an unnamed allocation
|
||||
snprintf(file_name, kFileNameSize, "%p", this);
|
||||
amd::Os::FileDesc handle{};
|
||||
shared_ = reinterpret_cast<SharedMemPool*>(amd::Os::CreateIpcMemory(
|
||||
file_name, sizeof(SharedMemPool), &handle));
|
||||
if (shared_ != nullptr) {
|
||||
shared_->handle_ = handle;
|
||||
shared_->state_ = state_.value_;
|
||||
shared_->access_size_ = 0;
|
||||
memset(shared_->access_, 0, sizeof(SharedAccess) * kMaxMgpuAccess);
|
||||
assert((access_map_.size() <= kMaxMgpuAccess) && "Can't support more GPU(s) in shared access" );
|
||||
for (auto it : access_map_) {
|
||||
shared_->access_[shared_->access_size_] = SharedAccess{it.first->deviceId(), it.second};
|
||||
shared_->access_size_++;
|
||||
}
|
||||
}
|
||||
return handle;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool MemoryPool::Import(amd::Os::FileDesc handle) {
|
||||
amd::ScopedLock lock(lock_pool_ops_);
|
||||
bool result = false;
|
||||
auto shared = reinterpret_cast<SharedMemPool*>(
|
||||
amd::Os::OpenIpcMemory(nullptr, handle, sizeof(SharedMemPool)));
|
||||
|
||||
if (shared != nullptr) {
|
||||
state_.value_ = shared->state_;
|
||||
for (uint32_t i = 0; i < shared->access_size_; ++i) {
|
||||
access_map_[g_devices[shared->access_[i].device_id_]] = shared->access_[i].flags_;
|
||||
}
|
||||
result = true;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,6 +31,12 @@ namespace hip {
|
||||
class Device;
|
||||
class Stream;
|
||||
|
||||
struct SharedMemPointer {
|
||||
size_t offset_;
|
||||
size_t size_;
|
||||
char handle_[IHIP_IPC_MEM_HANDLE_SIZE];
|
||||
};
|
||||
|
||||
struct MemoryTimestamp {
|
||||
MemoryTimestamp(hip::Stream* stream, hip::Event* event = nullptr): event_(event) {
|
||||
if (stream != nullptr) {
|
||||
@@ -160,16 +166,34 @@ private:
|
||||
/// hipMemPoolReuseAllowOpportunistic option will validate if HIP event,
|
||||
/// associated with memory is done, then reuse can be performed.
|
||||
class MemoryPool : public amd::ReferenceCountedObject {
|
||||
public:
|
||||
MemoryPool(hip::Device* device):
|
||||
busy_heap_(device),
|
||||
free_heap_(device),
|
||||
lock_pool_ops_("Pool operations", true), device_(device) {
|
||||
device_->AddMemoryPool(this);
|
||||
state_.event_dependencies_ = 1;
|
||||
state_.opportunistic_ = 1;
|
||||
state_.internal_dependencies_ = 1;
|
||||
}
|
||||
public:
|
||||
struct SharedAccess {
|
||||
int device_id_; //!< Device ID for access with a specified shared resource
|
||||
hipMemAccessFlags flags_; //!< Flags which define access type
|
||||
};
|
||||
|
||||
static constexpr uint32_t kMaxMgpuAccess = 32;
|
||||
struct SharedMemPool {
|
||||
amd::Os::FileDesc handle_; //!< File descriptor for shared memory
|
||||
uint32_t state_; //!< Memory pool state
|
||||
uint32_t access_size_; //!< The number of entries in access array
|
||||
SharedAccess access_[kMaxMgpuAccess]; //!< The list of devices for access
|
||||
};
|
||||
|
||||
MemoryPool(hip::Device* device, bool interprocess = false)
|
||||
: busy_heap_(device),
|
||||
free_heap_(device),
|
||||
lock_pool_ops_("Pool operations", true),
|
||||
device_(device),
|
||||
shared_(nullptr) {
|
||||
device_->AddMemoryPool(this);
|
||||
state_.value_ = 0;
|
||||
state_.event_dependencies_ = 1;
|
||||
state_.opportunistic_ = 1;
|
||||
state_.internal_dependencies_ = 1;
|
||||
state_.interprocess_ = interprocess;
|
||||
}
|
||||
|
||||
virtual ~MemoryPool() {
|
||||
if (!busy_heap_.IsEmpty()) {
|
||||
LogError("Shouldn't destroy pool with busy allocations!");
|
||||
@@ -177,6 +201,10 @@ public:
|
||||
ReleaseAllMemory();
|
||||
// Remove memory pool from the list of all pool on the current device
|
||||
device_->RemoveMemoryPool(this);
|
||||
if (shared_ != nullptr) {
|
||||
// Note: The app supposes to close the handle... Double close in Windows will cause a crash
|
||||
amd::Os::CloseIpcMemory(0, shared_, sizeof(SharedMemPool));
|
||||
}
|
||||
}
|
||||
|
||||
/// The same stream can reuse memory without HIP event validation
|
||||
@@ -186,9 +214,7 @@ public:
|
||||
bool FreeMemory(amd::Memory* memory, hip::Stream* stream);
|
||||
|
||||
/// Check if memory is active and belongs to the busy heap
|
||||
bool IsBusyMemory(amd::Memory* memory) const {
|
||||
return busy_heap_.IsActiveMemory(memory);
|
||||
}
|
||||
bool IsBusyMemory(amd::Memory* memory) const { return busy_heap_.IsActiveMemory(memory); }
|
||||
|
||||
/// Releases all allocations from free_heap_. It can be called on Stream or Device synchronization
|
||||
/// @note The caller must make sure it's safe to release memory
|
||||
@@ -200,6 +226,10 @@ public:
|
||||
/// Releases all allocations in MemoryPool
|
||||
void ReleaseAllMemory();
|
||||
|
||||
/// Place the allocated memory into the busy heap
|
||||
void AddBusyMemory(amd::Memory* memory) {
|
||||
busy_heap_.AddMemory(memory, nullptr);
|
||||
}
|
||||
/// Trims the pool until it has only min_bytes_to_hold
|
||||
void TrimTo(size_t min_bytes_to_hold);
|
||||
|
||||
@@ -221,6 +251,12 @@ public:
|
||||
/// Frees all busy memory
|
||||
void FreeAllMemory(hip::Stream* stream = nullptr);
|
||||
|
||||
/// Exports memory pool into an OS specific handle
|
||||
amd::Os::FileDesc Export();
|
||||
|
||||
/// Imports memory pool from an OS specific handle
|
||||
bool Import(amd::Os::FileDesc handle);
|
||||
|
||||
/// Accessors for the pool state
|
||||
bool EventDependencies() const { return (state_.event_dependencies_) ? true : false; }
|
||||
bool Opportunistic() const { return (state_.opportunistic_) ? true : false; }
|
||||
@@ -233,15 +269,22 @@ private:
|
||||
|
||||
Heap busy_heap_; //!< Heap of busy allocations
|
||||
Heap free_heap_; //!< Heap of freed allocations
|
||||
struct {
|
||||
uint32_t event_dependencies_ : 1; //!< Event dependencies tracking is enabled
|
||||
uint32_t opportunistic_ : 1; //!< HIP event check is enabled
|
||||
uint32_t internal_dependencies_ : 1; //!< Runtime adds internal events to handle memory dependencies
|
||||
union {
|
||||
struct {
|
||||
uint32_t event_dependencies_ : 1; //!< Event dependencies tracking is enabled
|
||||
uint32_t opportunistic_ : 1; //!< HIP event check is enabled
|
||||
uint32_t internal_dependencies_ : 1; //!< Runtime adds internal events to handle memory
|
||||
//!< dependencies
|
||||
uint32_t interprocess_ : 1; //!< Memory pool can be used in interprocess communications
|
||||
};
|
||||
uint32_t value_;
|
||||
} state_;
|
||||
|
||||
amd::Monitor lock_pool_ops_; //!< Access to the pool must be lock protected
|
||||
std::map<hip::Device*, hipMemAccessFlags> access_map_; //!< Map of access to the pool from devices
|
||||
hip::Device* device_; //!< Hip device the heap will reside
|
||||
SharedMemPool* shared_; //!< Pointer to shared memory for IPC
|
||||
};
|
||||
|
||||
|
||||
} // Mamespace hip
|
||||
|
||||
@@ -332,7 +332,7 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
|
||||
memcpy(info_.uuid_ + 4, &palProp.pciProperties.busNumber, sizeof(uint32_t));
|
||||
memcpy(info_.uuid_ + 8, &palProp.pciProperties.deviceNumber, sizeof(uint32_t));
|
||||
memcpy(info_.uuid_ + 12, &palProp.pciProperties.functionNumber, sizeof(uint32_t));
|
||||
|
||||
|
||||
info_.maxWorkItemDimensions_ = 3;
|
||||
|
||||
info_.maxComputeUnits_ = settings().enableWgpMode_
|
||||
@@ -1571,7 +1571,10 @@ pal::Memory* Device::createBuffer(amd::Memory& owner, bool directAccess) const {
|
||||
type = Resource::P2PAccess;
|
||||
}
|
||||
}
|
||||
|
||||
params.interprocess_ = (owner.getMemFlags() & ROCCLR_MEM_INTERPROCESS) ? true : false;
|
||||
if (owner.ipcShared()) {
|
||||
type = Resource::IpcMemory;
|
||||
}
|
||||
// Create memory object
|
||||
result = gpuMemory->create(type, ¶ms);
|
||||
|
||||
@@ -2342,6 +2345,116 @@ void Device::virtualFree(void* addr) {
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Device::IpcCreate(void* dev_ptr, size_t* mem_size, void* handle, size_t* mem_offset) const {
|
||||
hsa_status_t hsa_status = HSA_STATUS_SUCCESS;
|
||||
|
||||
amd::Memory* amd_mem_obj = amd::MemObjMap::FindMemObj(dev_ptr);
|
||||
if (amd_mem_obj == nullptr) {
|
||||
DevLogPrintfError("Cannot retrieve amd_mem_obj for dev_ptr: 0x%x", dev_ptr);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the original pointer from the amd::Memory object
|
||||
void* orig_dev_ptr = nullptr;
|
||||
if (amd_mem_obj->getSvmPtr() != nullptr) {
|
||||
orig_dev_ptr = amd_mem_obj->getSvmPtr();
|
||||
} else if (amd_mem_obj->getHostMem() != nullptr) {
|
||||
orig_dev_ptr = amd_mem_obj->getHostMem();
|
||||
} else {
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
||||
// Check if the dev_ptr is lesser than original dev_ptr
|
||||
if (orig_dev_ptr > dev_ptr) {
|
||||
// If this happens, then revisit FindMemObj logic
|
||||
DevLogPrintfError("Original dev_ptr: 0x%x cannot be greater than dev_ptr: 0x%x", orig_dev_ptr,
|
||||
dev_ptr);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Calculate the memory offset from the original base ptr
|
||||
*mem_offset = reinterpret_cast<address>(dev_ptr) - reinterpret_cast<address>(orig_dev_ptr);
|
||||
*mem_size = amd_mem_obj->getSize();
|
||||
|
||||
// Check if the dev_ptr is greater than memory allocated
|
||||
if (*mem_offset > *mem_size) {
|
||||
DevLogPrintfError(
|
||||
"Memory offset: %u cannot be greater than size of original memory allocated: %u", *mem_size,
|
||||
*mem_offset);
|
||||
return false;
|
||||
}
|
||||
auto dev_mem = getGpuMemory(amd_mem_obj);
|
||||
*reinterpret_cast<void**>(handle) = dev_mem->ExportHandle();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Device::IpcAttach(const void* handle, size_t mem_size, size_t mem_offset, unsigned int flags,
|
||||
void** dev_ptr) const {
|
||||
amd::Memory* amd_mem_obj = nullptr;
|
||||
|
||||
// Note: ROCr path has a validation for duplicated IPC memory, but PAL currently can't
|
||||
// identify the duplicates
|
||||
|
||||
// Create an amd Memory object for the handle
|
||||
amd_mem_obj = new (context()) amd::IpcBuffer(context(), flags, mem_offset, mem_size,
|
||||
*reinterpret_cast<amd::Os::FileDesc*>(const_cast<void*>(handle)));
|
||||
if (amd_mem_obj == nullptr) {
|
||||
LogError("failed to create a mem object!");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!amd_mem_obj->create(nullptr)) {
|
||||
LogError("failed to create a svm hidden buffer!");
|
||||
amd_mem_obj->release();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Add the original mem_ptr to the MemObjMap with newly created amd_mem_obj
|
||||
amd::MemObjMap::AddMemObj(amd_mem_obj->getSvmPtr(), amd_mem_obj);
|
||||
|
||||
// Make sure the mem_offset doesnt overflow the allocated memory
|
||||
guarantee((mem_offset < mem_size), "IPC mem offset greater than allocated size");
|
||||
|
||||
*dev_ptr = amd_mem_obj->getSvmPtr();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Device::IpcDetach(void* dev_ptr) const {
|
||||
hsa_status_t hsa_status = HSA_STATUS_SUCCESS;
|
||||
|
||||
amd::Memory* amd_mem_obj = amd::MemObjMap::FindMemObj(dev_ptr);
|
||||
if (amd_mem_obj == nullptr) {
|
||||
DevLogPrintfError("Memory object for the ptr: 0x%x cannot be null \n", dev_ptr);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!amd_mem_obj->ipcShared()) {
|
||||
DevLogPrintfError("Memory object for the ptr: 0x%x is not ipcShared \n", dev_ptr);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the original pointer from the amd::Memory object
|
||||
void* orig_dev_ptr = nullptr;
|
||||
if (amd_mem_obj->getSvmPtr() != nullptr) {
|
||||
orig_dev_ptr = amd_mem_obj->getSvmPtr();
|
||||
} else if (amd_mem_obj->getHostMem() != nullptr) {
|
||||
orig_dev_ptr = amd_mem_obj->getHostMem();
|
||||
} else {
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
||||
if (amd_mem_obj->release() == 0) {
|
||||
amd::MemObjMap::RemoveMemObj(orig_dev_ptr);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Device::AcquireExclusiveGpuAccess() {
|
||||
// Lock the virtual GPU list
|
||||
@@ -2508,8 +2621,7 @@ bool Device::createBlitProgram() {
|
||||
if (info().cooperativeGroups_) {
|
||||
extraBlits.append(GwsInitSourceCode);
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
if (settings().oclVersion_ >= OpenCL20) {
|
||||
extraBlits = iDev()->GetDispatchKernelSource();
|
||||
if (settings().useLightning_) {
|
||||
@@ -2553,7 +2665,7 @@ bool Device::SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeI
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
// ================================================================================================
|
||||
bool Device::importExtSemaphore(void** extSemaphore, const amd::Os::FileDesc& handle,
|
||||
amd::ExternalSemaphoreHandleType sem_handle_type) {
|
||||
Pal::ExternalQueueSemaphoreOpenInfo palOpenInfo = {};
|
||||
|
||||
@@ -533,6 +533,15 @@ class Device : public NullDevice {
|
||||
virtual void* virtualAlloc(void* addr, size_t size, size_t alignment);
|
||||
virtual void virtualFree(void* addr);
|
||||
|
||||
//! Creates IPC memory handle from a provided SVM pointer
|
||||
virtual bool IpcCreate(void* dev_ptr, size_t* mem_size,
|
||||
void* handle, size_t* mem_offset) const override;
|
||||
//! Attch IPC memory to the current device
|
||||
virtual bool IpcAttach(const void* handle, size_t mem_size, size_t mem_offset, unsigned int flags,
|
||||
void** dev_ptr) const override;
|
||||
//! Detach IPC memory from the current device
|
||||
virtual bool IpcDetach(void* dev_ptr) const override;
|
||||
|
||||
//! Returns SRD manger object
|
||||
SrdManager& srds() const { return *srdManager_; }
|
||||
|
||||
|
||||
@@ -1027,6 +1027,22 @@ bool Resource::CreateInterop(CreateParams* params) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Resource::CreateIpc(CreateParams* params) {
|
||||
Pal::ExternalGpuMemoryOpenInfo gpuMemOpenInfo = {};
|
||||
Pal::ExternalResourceOpenInfo& openInfo = gpuMemOpenInfo.resourceInfo;
|
||||
|
||||
openInfo.hExternalResource = reinterpret_cast<amd::IpcBuffer*>(params->owner_)->Handle();
|
||||
openInfo.flags.ntHandle = false;
|
||||
|
||||
memRef_ = GpuMemoryReference::Create(dev(), gpuMemOpenInfo);
|
||||
if (nullptr == memRef_) {
|
||||
return false;
|
||||
}
|
||||
params->owner_->setSvmPtr(reinterpret_cast<void*>(memRef_->iMem()->Desc().gpuVirtAddr));
|
||||
return true;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Resource::CreateP2PAccess(CreateParams* params) {
|
||||
if (params->owner_->asImage()) {
|
||||
@@ -1123,6 +1139,7 @@ bool Resource::CreateSvm(CreateParams* params, Pal::gpusize svmPtr) {
|
||||
createInfo.flags.useReservedGpuVa = false;
|
||||
createInfo.pReservedGpuVaOwner = nullptr;
|
||||
}
|
||||
createInfo.flags.interprocess = desc_.interprocess_;
|
||||
if (!dev().settings().svmFineGrainSystem_) {
|
||||
memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment,
|
||||
createInfo.pReservedGpuVaOwner, &subOffset_);
|
||||
@@ -1141,6 +1158,8 @@ bool Resource::CreateSvm(CreateParams* params, Pal::gpusize svmPtr) {
|
||||
createInfo.pReservedGpuVaOwner = params->svmBase_->iMem();
|
||||
}
|
||||
memTypeToHeap(&createInfo);
|
||||
createInfo.flags.interprocess = desc_.interprocess_;
|
||||
|
||||
memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment,
|
||||
createInfo.pReservedGpuVaOwner, &subOffset_);
|
||||
if (memRef_ == nullptr) {
|
||||
@@ -1210,6 +1229,8 @@ bool Resource::create(MemoryType memType, CreateParams* params, bool forceLinear
|
||||
if (dev().settings().disablePersistent_ && (memoryType() == Persistent)) {
|
||||
desc_.type_ = RemoteUSWC;
|
||||
}
|
||||
desc_.interprocess_ = (nullptr != params) ? params->interprocess_ : false;
|
||||
|
||||
switch (memoryType()) {
|
||||
case OGLInterop:
|
||||
case D3D9Interop:
|
||||
@@ -1242,6 +1263,8 @@ bool Resource::create(MemoryType memType, CreateParams* params, bool forceLinear
|
||||
}
|
||||
return true;
|
||||
}
|
||||
case IpcMemory:
|
||||
return CreateIpc(params);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -1313,6 +1336,14 @@ bool Resource::create(MemoryType memType, CreateParams* params, bool forceLinear
|
||||
return true;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void* Resource::ExportHandle() const {
|
||||
Pal::GpuMemoryExportInfo exportInfo = {};
|
||||
// Set default flags in case they are not provided by application
|
||||
exportInfo.accessFlags = GENERIC_READ | GENERIC_WRITE;
|
||||
Pal::OsExternalHandle handle = iMem()->ExportExternalHandle(exportInfo);
|
||||
return reinterpret_cast<void*>(handle);
|
||||
}
|
||||
// ================================================================================================
|
||||
void Resource::free() {
|
||||
if (memRef_ == nullptr) {
|
||||
@@ -2246,7 +2277,8 @@ GpuMemoryReference* ResourceCache::findGpuMemory(Resource::Descriptor* desc, Pal
|
||||
(size > (sizeRes >> 1)) && ((it.second->iMem()->Desc().gpuVirtAddr % alignment) == 0) &&
|
||||
(entry->isAllocExecute_ == desc->isAllocExecute_) &&
|
||||
(entry->SVMRes_ == desc->SVMRes_) &&
|
||||
(entry->gl2CacheDisabled_ == desc->gl2CacheDisabled_)) {
|
||||
(entry->gl2CacheDisabled_ == desc->gl2CacheDisabled_) &&
|
||||
(entry->interprocess_ == desc->interprocess_)) {
|
||||
ref = it.second;
|
||||
cacheSize_ -= sizeRes;
|
||||
if (entry->type_ == Resource::Local) {
|
||||
|
||||
@@ -103,7 +103,8 @@ class Resource : public amd::HeapObject {
|
||||
amd::Memory* owner_; //!< Resource's owner
|
||||
VirtualGPU* gpu_; //!< Resource won't be shared between multiple queues
|
||||
const Resource* svmBase_; //!< SVM base for MGPU allocations
|
||||
CreateParams() : owner_(nullptr), gpu_(nullptr), svmBase_(nullptr) {}
|
||||
bool interprocess_; //!< Ressource can be used in the interprocess communication
|
||||
CreateParams() : owner_(nullptr), gpu_(nullptr), svmBase_(nullptr), interprocess_(false) {}
|
||||
};
|
||||
|
||||
struct PinnedParams : public CreateParams {
|
||||
@@ -176,7 +177,8 @@ class Resource : public amd::HeapObject {
|
||||
Shader, //!< resource is a shader
|
||||
P2PAccess, //!< resource is a shared resource for P2P access
|
||||
VkInterop, //!< resource is a Vulkan memory object
|
||||
VaRange //!< reousrce is a virtual address range
|
||||
VaRange, //!< reousrce is a virtual address range
|
||||
IpcMemory //!< reousrce is a IPC memory object
|
||||
};
|
||||
|
||||
//! Resource map flags
|
||||
@@ -213,6 +215,7 @@ class Resource : public amd::HeapObject {
|
||||
uint isDoppTexture_ : 1; //!< PAL resource is for a DOPP desktop texture
|
||||
uint gl2CacheDisabled_ : 1;//!< PAL resource is allocated with GPU L2 cache disabled.
|
||||
uint reserved_va_ : 1; //!< PAL resource was allocated for a reserved VA
|
||||
uint interprocess_ : 1; //!< PAL resource can be shared between processes
|
||||
};
|
||||
uint state_;
|
||||
};
|
||||
@@ -430,9 +433,17 @@ class Resource : public amd::HeapObject {
|
||||
*/
|
||||
bool CreateP2PAccess(CreateParams* params //!< special parameters for resource allocation
|
||||
);
|
||||
//! Returns an export handle for the interprocess communication
|
||||
void* ExportHandle() const;
|
||||
|
||||
protected:
|
||||
/*! \brief Creates a PAL iamge object, associated with the resource
|
||||
/*! \brief Creates a PAL memory object, from IPC handle
|
||||
*
|
||||
* \return True if we succesfully created a PAL resource
|
||||
*/
|
||||
bool CreateIpc(CreateParams* params);
|
||||
|
||||
/*! \brief Creates a PAL iamge object, associated with the resource
|
||||
*
|
||||
* \return True if we succesfully created a PAL resource
|
||||
*/
|
||||
|
||||
@@ -1048,7 +1048,7 @@ bool VirtualGPU::create(bool profiling, uint deviceQueueSize, uint rtCUs,
|
||||
dev().rgpCaptureMgr()->RegisterTimedQueue(2 * index() + 1, queue(SdmaEngine).iQueue_,
|
||||
&dbg_vmid);
|
||||
}
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -131,6 +131,15 @@ class Os : AllStatic {
|
||||
// Given a valid mmaped ptr with correct size, unmaps the ptr from memory
|
||||
static bool MemoryUnmapFile(const void* mmap_ptr, size_t mmap_size);
|
||||
|
||||
// Given a valid filename create system memory that can be shared between processes
|
||||
static void* CreateIpcMemory(const char* fname, size_t size, FileDesc* desc);
|
||||
|
||||
// Given a valid file descriptor open IPC memory
|
||||
static void* OpenIpcMemory(const char* fname, const FileDesc desc, size_t size);
|
||||
|
||||
// Given a valid file descriptor close IPC memory
|
||||
static void CloseIpcMemory(const FileDesc desc, const void* ptr, size_t size);
|
||||
|
||||
private:
|
||||
static constexpr size_t FILE_PATH_MAX_LENGTH = 1024;
|
||||
|
||||
|
||||
@@ -910,6 +910,47 @@ int Os::getProcessId() {
|
||||
return ::getpid();
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void* Os::CreateIpcMemory(const char* fname, size_t size, FileDesc* desc) {
|
||||
*desc = shm_open(fname, O_RDWR | O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO);
|
||||
if (*desc < 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int status = ftruncate(*desc, size);
|
||||
if (status != 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, *desc, 0);
|
||||
return addr;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void* Os::OpenIpcMemory(const char* fname, const FileDesc desc, size_t size) {
|
||||
FileDesc handle = desc;
|
||||
if (fname != nullptr) {
|
||||
handle = shm_open(fname, O_RDWR, S_IRWXU|S_IRWXG|S_IRWXO);
|
||||
}
|
||||
|
||||
if (handle < 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, handle, 0);
|
||||
return addr;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void Os::CloseIpcMemory(const FileDesc desc, const void* ptr, size_t size) {
|
||||
if (ptr != nullptr) {
|
||||
munmap(const_cast<void*>(ptr), size);
|
||||
}
|
||||
if (desc != 0) {
|
||||
close(desc);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace amd
|
||||
|
||||
#endif // !defined(_WIN32) && !defined(__CYGWIN__)
|
||||
|
||||
@@ -945,6 +945,42 @@ int Os::getProcessId() {
|
||||
return ::_getpid();
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void* Os::CreateIpcMemory(const char* fname, size_t size, FileDesc* desc) {
|
||||
void* addr = nullptr;
|
||||
*desc = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE,
|
||||
0, static_cast<DWORD>(size), fname);
|
||||
if (*desc != 0) {
|
||||
addr = MapViewOfFile(*desc, FILE_MAP_ALL_ACCESS, 0, 0, size);
|
||||
}
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void* Os::OpenIpcMemory(const char* fname, const FileDesc desc, size_t size) {
|
||||
void* addr = nullptr;
|
||||
FileDesc handle = desc;
|
||||
if (fname != nullptr) {
|
||||
handle = CreateFileMapping(desc, NULL, PAGE_READWRITE, 0, static_cast<DWORD>(size), fname);
|
||||
}
|
||||
if (handle != 0) {
|
||||
addr = MapViewOfFile(handle, FILE_MAP_ALL_ACCESS, 0, 0, size);
|
||||
}
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void Os::CloseIpcMemory(const FileDesc desc, const void* ptr, size_t size) {
|
||||
if (ptr != nullptr) {
|
||||
UnmapViewOfFile(ptr);
|
||||
}
|
||||
if (desc != nullptr) {
|
||||
CloseHandle(desc);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace amd
|
||||
|
||||
#endif // _WIN32 || __CYGWIN__
|
||||
|
||||
@@ -1535,6 +1535,14 @@ void SvmBuffer::memFill(void* dst, const void* src, size_t srcSize, size_t times
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool SvmBuffer::malloced(const void* ptr) { return Contains(reinterpret_cast<uintptr_t>(ptr)); }
|
||||
|
||||
// ================================================================================================
|
||||
void IpcBuffer::initDeviceMemory() {
|
||||
deviceMemories_ =
|
||||
reinterpret_cast<DeviceMemory*>(reinterpret_cast<char*>(this) + sizeof(IpcBuffer));
|
||||
memset(deviceMemories_, 0, NumDevicesWithP2P() * sizeof(DeviceMemory));
|
||||
}
|
||||
|
||||
} // namespace amd
|
||||
|
||||
@@ -37,11 +37,12 @@
|
||||
#include <unordered_map>
|
||||
#include <memory>
|
||||
#include <limits>
|
||||
#define CL_MEM_FOLLOW_USER_NUMA_POLICY (1u << 31)
|
||||
#define ROCCLR_MEM_HSA_SIGNAL_MEMORY (1u << 30)
|
||||
#define ROCCLR_MEM_INTERNAL_MEMORY (1u << 29)
|
||||
#define CL_MEM_VA_RANGE_AMD (1u << 28)
|
||||
#define ROCCLR_MEM_HSA_UNCACHED (1u << 27)
|
||||
#define CL_MEM_FOLLOW_USER_NUMA_POLICY (1u << 31)
|
||||
#define ROCCLR_MEM_HSA_SIGNAL_MEMORY (1u << 30)
|
||||
#define ROCCLR_MEM_INTERNAL_MEMORY (1u << 29)
|
||||
#define CL_MEM_VA_RANGE_AMD (1u << 28)
|
||||
#define ROCCLR_MEM_HSA_UNCACHED (1u << 27)
|
||||
#define ROCCLR_MEM_INTERPROCESS (1u << 26)
|
||||
|
||||
namespace device {
|
||||
class Memory;
|
||||
@@ -672,6 +673,21 @@ public:
|
||||
bool isArena() { return true; }
|
||||
};
|
||||
|
||||
class IpcBuffer : public Buffer {
|
||||
public:
|
||||
IpcBuffer(Context& context, Flags flags, size_t offset, size_t size, amd::Os::FileDesc handle)
|
||||
: Buffer(context, flags, offset, size), handle_(handle) {
|
||||
setIpcShared(true);
|
||||
}
|
||||
|
||||
virtual void initDeviceMemory();
|
||||
amd::Os::FileDesc Handle() const { return handle_; }
|
||||
|
||||
private:
|
||||
amd::Os::FileDesc handle_; //!< Ipc handle, associated with this memory object
|
||||
};
|
||||
|
||||
|
||||
} // namespace amd
|
||||
|
||||
#endif // MEMORY_H_
|
||||
|
||||
Ссылка в новой задаче
Block a user