SWDEV-333557 - 1.Add support for IPC memory to enable hipIpcGetMemHandle() and hipIpcOpenMemHandle() in PAL path. Set interprocess flag for device allocations in HIP PAL. 2.Fix PAL IPC path with ResourceCache and SubAllocation 3.Minor fixes for IpcBuffer constructor and IpcAttach 4.Remove redundant checks that may go wrong
Change-Id: Ie9d99847c7c2c7b3b2aaefaaf60d23bf71b68635
[ROCm/clr commit: 8434feed16]
This commit is contained in:
@@ -844,16 +844,12 @@ bool Device::IpcCreate(void* dev_ptr, size_t* mem_size, void* handle, size_t* me
|
||||
}
|
||||
|
||||
// Calculate the memory offset from the original base ptr
|
||||
*mem_offset = reinterpret_cast<address>(dev_ptr) - reinterpret_cast<address>(orig_dev_ptr);
|
||||
*mem_offset = reinterpret_cast<address>(dev_ptr)
|
||||
- reinterpret_cast<address>(orig_dev_ptr)
|
||||
+ amd_mem_obj->getOffset();
|
||||
|
||||
*mem_size = amd_mem_obj->getSize();
|
||||
|
||||
// Check if the dev_ptr is greater than memory allocated
|
||||
if (*mem_offset > *mem_size) {
|
||||
DevLogPrintfError(
|
||||
"Memory offset: %u cannot be greater than size of original memory allocated: %u", *mem_size,
|
||||
*mem_offset);
|
||||
return false;
|
||||
}
|
||||
auto dev_mem = static_cast<device::Memory*>(amd_mem_obj->getDeviceMemory(*this));
|
||||
auto result = dev_mem->ExportHandle(handle);
|
||||
|
||||
@@ -882,9 +878,6 @@ bool Device::IpcAttach(const void* handle, size_t mem_size, size_t mem_offset, u
|
||||
if (mem_obj_exist == nullptr) {
|
||||
// Add the original mem_ptr to the MemObjMap with newly created amd_mem_obj
|
||||
amd::MemObjMap::AddMemObj(amd_mem_obj->getSvmPtr(), amd_mem_obj);
|
||||
|
||||
// Make sure the mem_offset doesnt overflow the allocated memory
|
||||
guarantee((mem_offset < mem_size), "IPC mem offset greater than allocated size");
|
||||
} else {
|
||||
amd_mem_obj->release();
|
||||
amd_mem_obj = mem_obj_exist;
|
||||
|
||||
@@ -2266,6 +2266,11 @@ void* Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_
|
||||
constexpr bool kForceAllocation = true;
|
||||
alignment = std::max(alignment, static_cast<size_t>(info_.memBaseAddrAlign_));
|
||||
|
||||
if (amd::IS_HIP) {
|
||||
//set interprocess for IPC memory support
|
||||
flags |= ROCCLR_MEM_INTERPROCESS;
|
||||
}
|
||||
|
||||
amd::Memory* mem = nullptr;
|
||||
freeCPUMem_ = false;
|
||||
if (nullptr == svmPtr) {
|
||||
|
||||
@@ -1040,7 +1040,8 @@ bool Resource::CreateIpc(CreateParams* params) {
|
||||
if (nullptr == memRef_) {
|
||||
return false;
|
||||
}
|
||||
params->owner_->setSvmPtr(reinterpret_cast<void*>(memRef_->iMem()->Desc().gpuVirtAddr));
|
||||
offset_ += params->owner_->getOffset();
|
||||
params->owner_->setSvmPtr(reinterpret_cast<void*>(memRef_->iMem()->Desc().gpuVirtAddr + offset_));
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1178,6 +1179,7 @@ bool Resource::CreateSvm(CreateParams* params, Pal::gpusize svmPtr) {
|
||||
params->owner_->setSvmPtr(
|
||||
reinterpret_cast<void*>(memRef_->iMem()->Desc().gpuVirtAddr + subOffset_));
|
||||
offset_ += static_cast<size_t>(subOffset_);
|
||||
params->owner_->setOffset(offset_);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@@ -2055,6 +2057,10 @@ bool CoarseMemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved_va) {
|
||||
createInfo.heaps[0] = Pal::GpuHeapInvisible;
|
||||
createInfo.heaps[1] = Pal::GpuHeapLocal;
|
||||
createInfo.mallPolicy = static_cast<Pal::GpuMemMallPolicy>(device_->settings().mallPolicy_);
|
||||
if (amd::IS_HIP) {
|
||||
//set interprocess for IPC memory support
|
||||
createInfo.flags.interprocess = 1;
|
||||
}
|
||||
GpuMemoryReference* mem_ref = GpuMemoryReference::Create(*device_, createInfo);
|
||||
if (mem_ref != nullptr) {
|
||||
// Workaround: some chunk memory are not guaranteed to be resident during initial allocation.
|
||||
|
||||
@@ -94,6 +94,7 @@ Memory::Memory(Context& context, Type type, Flags flags, size_t size, void* svmP
|
||||
vDev_(NULL),
|
||||
mapCount_(0),
|
||||
svmHostAddress_(svmPtr),
|
||||
resOffset_(0),
|
||||
flagsEx_(0),
|
||||
lockMemoryOps_("Memory Ops Lock", true) {
|
||||
svmPtrCommited_ = (flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) ? true : false;
|
||||
@@ -117,6 +118,7 @@ Memory::Memory(Memory& parent, Flags flags, size_t origin, size_t size, Type typ
|
||||
vDev_(NULL),
|
||||
mapCount_(0),
|
||||
svmHostAddress_(parent.getSvmPtr()),
|
||||
resOffset_(0),
|
||||
flagsEx_(0),
|
||||
lockMemoryOps_("Memory Ops Lock", true) {
|
||||
svmPtrCommited_ = parent.isSvmPtrCommited();
|
||||
|
||||
@@ -194,6 +194,7 @@ class Memory : public amd::RuntimeObject {
|
||||
device::VirtualDevice* vDev_; //!< Memory object belongs to a virtual device only
|
||||
std::atomic_uint mapCount_; //!< Keep track of number of mappings for a memory object
|
||||
void* svmHostAddress_; //!< svm host address;
|
||||
size_t resOffset_; //!< resource offset
|
||||
union {
|
||||
struct {
|
||||
uint32_t isParent_ : 1; //!< This object is a parent
|
||||
@@ -372,6 +373,10 @@ class Memory : public amd::RuntimeObject {
|
||||
|
||||
void* getSvmPtr() const { return svmHostAddress_; } //!< svm pointer accessor;
|
||||
void setSvmPtr(void* ptr) { svmHostAddress_ = ptr; } //!< svm pointer setter;
|
||||
|
||||
size_t getOffset() const { return resOffset_; } //!< resource offset accessor;
|
||||
void setOffset(size_t offset) { resOffset_ = offset; } //!< resource offset setter;
|
||||
|
||||
bool isSvmPtrCommited() const {
|
||||
return svmPtrCommited_;
|
||||
} //!< svm host address committed accessor;
|
||||
@@ -679,8 +684,9 @@ public:
|
||||
class IpcBuffer : public Buffer {
|
||||
public:
|
||||
IpcBuffer(Context& context, Flags flags, size_t offset, size_t size, const void* handle)
|
||||
: Buffer(context, flags, offset, size), handle_(handle) {
|
||||
: Buffer(context, flags, size), handle_(handle) {
|
||||
setIpcShared(true);
|
||||
setOffset(offset);
|
||||
}
|
||||
|
||||
virtual void initDeviceMemory();
|
||||
|
||||
Reference in New Issue
Block a user