SWDEV-511204 - Mapped virtual memory should use device instead of host context (#213)
Since the sub-buffer(virtual memory that is mapped to device memory) is associated with device memory, it should utilize the device context instead of the host context. The original implementation caused hipMemcpyPeer to not take the P2P path, as the memory object was treated as host memory.
[ROCm/clr commit: a7492c516d]
Este cometimento está contido em:
cometido por
GitHub
ascendente
5bd5f3aa6e
cometimento
558a26cf98
@@ -539,7 +539,8 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src,
|
||||
hip::Stream* pStream = &stream;
|
||||
switch (type) {
|
||||
case hipWriteBuffer:
|
||||
if (queueDevice != dstMemory->GetDeviceById()) {
|
||||
if (queueDevice != dstMemory->GetDeviceById() &&
|
||||
!(dstMemory->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
|
||||
pStream = hip::getNullStream(dstMemory->GetDeviceById()->context());
|
||||
amd::Command* cmd = stream.getLastQueuedCommand(true);
|
||||
if (cmd != nullptr) {
|
||||
@@ -551,7 +552,8 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src,
|
||||
copyMetadata);
|
||||
break;
|
||||
case hipReadBuffer:
|
||||
if (queueDevice != srcMemory->GetDeviceById()) {
|
||||
if (queueDevice != srcMemory->GetDeviceById() &&
|
||||
!(srcMemory->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
|
||||
pStream = hip::getNullStream(srcMemory->GetDeviceById()->context());
|
||||
amd::Command* cmd = stream.getLastQueuedCommand(true);
|
||||
if (cmd != nullptr) {
|
||||
@@ -589,7 +591,8 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src,
|
||||
} else if (srcMemory->GetDeviceById() != dstMemory->GetDeviceById()) {
|
||||
// Scenarios such as DtoH where dst is pinned memory
|
||||
if ((queueDevice != srcMemory->GetDeviceById()) &&
|
||||
(dstMemory->getContext().devices().size() != 1)) {
|
||||
(dstMemory->getContext().devices().size() != 1) &&
|
||||
!(srcMemory->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
|
||||
pStream = hip::getNullStream(srcMemory->GetDeviceById()->context());
|
||||
amd::Command* cmd = stream.getLastQueuedCommand(true);
|
||||
if (cmd != nullptr) {
|
||||
@@ -597,7 +600,8 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src,
|
||||
}
|
||||
// Scenarios such as HtoD where src is pinned memory
|
||||
} else if ((queueDevice != dstMemory->GetDeviceById()) &&
|
||||
(srcMemory->getContext().devices().size() != 1)) {
|
||||
(srcMemory->getContext().devices().size() != 1) &&
|
||||
!(dstMemory->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
|
||||
pStream = hip::getNullStream(dstMemory->GetDeviceById()->context());
|
||||
amd::Command* cmd = stream.getLastQueuedCommand(true);
|
||||
if (cmd != nullptr) {
|
||||
|
||||
@@ -537,10 +537,12 @@ amd::Memory* Device::CreateVirtualBuffer(amd::Context& device_context, void* vpt
|
||||
}
|
||||
assert(vaddr_base_obj->getMemFlags() & CL_MEM_VA_RANGE_AMD);
|
||||
|
||||
size_t offset = (reinterpret_cast<address>(vptr)
|
||||
- reinterpret_cast<address>(vaddr_base_obj->getSvmPtr()));
|
||||
Context& ctx = vaddr_base_obj->getContext();
|
||||
vaddr_sub_obj = new (ctx) amd::Buffer(*vaddr_base_obj,CL_MEM_VA_RANGE_AMD, offset, size);
|
||||
size_t offset =
|
||||
(reinterpret_cast<address>(vptr) - reinterpret_cast<address>(vaddr_base_obj->getSvmPtr()));
|
||||
vaddr_sub_obj =
|
||||
new (device_context) amd::Buffer(device_context, CL_MEM_VA_RANGE_AMD, size, vptr);
|
||||
vaddr_sub_obj->SetParent(vaddr_base_obj);
|
||||
vaddr_sub_obj->setOrigin(offset);
|
||||
|
||||
// This curr_mem_obj->create() does not create an actual memory but stores the memory info
|
||||
// with given vptr on ROCr backend.
|
||||
|
||||
@@ -329,11 +329,18 @@ class Memory : public amd::RuntimeObject {
|
||||
|
||||
// Accessors
|
||||
Memory* parent() const { return parent_; }
|
||||
void SetParent(amd::Memory* parent) { parent_ = parent; }
|
||||
void SetParent(amd::Memory* parent) {
|
||||
parent_ = parent;
|
||||
if (parent != nullptr) {
|
||||
parent_->isParent_ = true;
|
||||
parent_->retain();
|
||||
}
|
||||
}
|
||||
bool isParent() const { return isParent_; }
|
||||
bool ImageView() const { return image_view_; }
|
||||
|
||||
size_t getOrigin() const { return origin_; }
|
||||
void setOrigin(size_t origin) { origin_ = origin; }
|
||||
size_t getSize() const { return size_; }
|
||||
Flags getMemFlags() const { return flags_; }
|
||||
Type getType() const { return type_; }
|
||||
@@ -721,4 +728,3 @@ class IpcBuffer : public Buffer {
|
||||
};
|
||||
|
||||
} // namespace amd
|
||||
|
||||
|
||||
Criar uma nova questão referindo esta
Bloquear um utilizador