2
0

SWDEV-511204 - Mapped virtual memory should use device instead of host context (#213)

Since the sub-buffer(virtual memory that is mapped to device memory) is associated with device memory, it should utilize the device context instead of the host context. The original implementation caused hipMemcpyPeer to not take the P2P path, as the memory object was treated as host memory.

[ROCm/clr commit: a7492c516d]
Este cometimento está contido em:
Arandjelovic, Marko
2025-05-12 16:55:25 +02:00
cometido por GitHub
ascendente 5bd5f3aa6e
cometimento 558a26cf98
3 ficheiros modificados com 22 adições e 10 eliminações
+8 -4
Ver ficheiro
@@ -539,7 +539,8 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src,
hip::Stream* pStream = &stream;
switch (type) {
case hipWriteBuffer:
if (queueDevice != dstMemory->GetDeviceById()) {
if (queueDevice != dstMemory->GetDeviceById() &&
!(dstMemory->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
pStream = hip::getNullStream(dstMemory->GetDeviceById()->context());
amd::Command* cmd = stream.getLastQueuedCommand(true);
if (cmd != nullptr) {
@@ -551,7 +552,8 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src,
copyMetadata);
break;
case hipReadBuffer:
if (queueDevice != srcMemory->GetDeviceById()) {
if (queueDevice != srcMemory->GetDeviceById() &&
!(srcMemory->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
pStream = hip::getNullStream(srcMemory->GetDeviceById()->context());
amd::Command* cmd = stream.getLastQueuedCommand(true);
if (cmd != nullptr) {
@@ -589,7 +591,8 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src,
} else if (srcMemory->GetDeviceById() != dstMemory->GetDeviceById()) {
// Scenarios such as DtoH where dst is pinned memory
if ((queueDevice != srcMemory->GetDeviceById()) &&
(dstMemory->getContext().devices().size() != 1)) {
(dstMemory->getContext().devices().size() != 1) &&
!(srcMemory->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
pStream = hip::getNullStream(srcMemory->GetDeviceById()->context());
amd::Command* cmd = stream.getLastQueuedCommand(true);
if (cmd != nullptr) {
@@ -597,7 +600,8 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src,
}
// Scenarios such as HtoD where src is pinned memory
} else if ((queueDevice != dstMemory->GetDeviceById()) &&
(srcMemory->getContext().devices().size() != 1)) {
(srcMemory->getContext().devices().size() != 1) &&
!(dstMemory->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
pStream = hip::getNullStream(dstMemory->GetDeviceById()->context());
amd::Command* cmd = stream.getLastQueuedCommand(true);
if (cmd != nullptr) {
+6 -4
Ver ficheiro
@@ -537,10 +537,12 @@ amd::Memory* Device::CreateVirtualBuffer(amd::Context& device_context, void* vpt
}
assert(vaddr_base_obj->getMemFlags() & CL_MEM_VA_RANGE_AMD);
size_t offset = (reinterpret_cast<address>(vptr)
- reinterpret_cast<address>(vaddr_base_obj->getSvmPtr()));
Context& ctx = vaddr_base_obj->getContext();
vaddr_sub_obj = new (ctx) amd::Buffer(*vaddr_base_obj,CL_MEM_VA_RANGE_AMD, offset, size);
size_t offset =
(reinterpret_cast<address>(vptr) - reinterpret_cast<address>(vaddr_base_obj->getSvmPtr()));
vaddr_sub_obj =
new (device_context) amd::Buffer(device_context, CL_MEM_VA_RANGE_AMD, size, vptr);
vaddr_sub_obj->SetParent(vaddr_base_obj);
vaddr_sub_obj->setOrigin(offset);
// This curr_mem_obj->create() does not create an actual memory but stores the memory info
// with given vptr on ROCr backend.
+8 -2
Ver ficheiro
@@ -329,11 +329,18 @@ class Memory : public amd::RuntimeObject {
// Accessors
Memory* parent() const { return parent_; }
void SetParent(amd::Memory* parent) { parent_ = parent; }
void SetParent(amd::Memory* parent) {
parent_ = parent;
if (parent != nullptr) {
parent_->isParent_ = true;
parent_->retain();
}
}
bool isParent() const { return isParent_; }
bool ImageView() const { return image_view_; }
size_t getOrigin() const { return origin_; }
void setOrigin(size_t origin) { origin_ = origin; }
size_t getSize() const { return size_; }
Flags getMemFlags() const { return flags_; }
Type getType() const { return type_; }
@@ -721,4 +728,3 @@ class IpcBuffer : public Buffer {
};
} // namespace amd