From 9f5001466a7c5bc69cb2cdd5583d217c3fbf6416 Mon Sep 17 00:00:00 2001
From: foreman
Date: Wed, 17 Apr 2019 18:38:30 -0400
Subject: [PATCH] P4 to Git Change 1771336 by cpaquot@cpaquot-ocl-lc-lnx on
2019/04/17 18:19:42
SWDEV-145570 - [HIP] Use staging buffer to copy peer to peer.
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#18 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#27 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#53 edit
---
hipamd/api/hip/hip_context.cpp | 14 ++++++----
hipamd/api/hip/hip_internal.hpp | 1 +
hipamd/api/hip/hip_memory.cpp | 47 +++++++++++++++++++++++++++++++++
3 files changed, 57 insertions(+), 5 deletions(-)
diff --git a/hipamd/api/hip/hip_context.cpp b/hipamd/api/hip/hip_context.cpp
index 02b57f0316..47bf651b8c 100644
--- a/hipamd/api/hip/hip_context.cpp
+++ b/hipamd/api/hip/hip_context.cpp
@@ -75,21 +75,25 @@ void setCurrentContext(unsigned int index) {
g_context = g_devices[index];
}
-amd::HostQueue* getNullStream() {
- auto stream = g_nullStreams.find(getCurrentContext());
+amd::HostQueue* getNullStream(amd::Context& context) {
+ auto stream = g_nullStreams.find(&context);
if (stream == g_nullStreams.end()) {
- amd::Device* device = getCurrentContext()->devices()[0];
+ amd::Device* device = context.devices()[0];
cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE;
- amd::HostQueue* queue = new amd::HostQueue(*hip::getCurrentContext(), *device, properties,
+ amd::HostQueue* queue = new amd::HostQueue(context, *device, properties,
amd::CommandQueue::RealTimeDisabled,
amd::CommandQueue::Priority::Normal);
- g_nullStreams[getCurrentContext()] = queue;
+ g_nullStreams[&context] = queue;
return queue;
}
syncStreams();
return stream->second;
}
+amd::HostQueue* getNullStream() {
+ return getNullStream(*getCurrentContext());
+}
+
};
using namespace hip;
diff --git a/hipamd/api/hip/hip_internal.hpp b/hipamd/api/hip/hip_internal.hpp
index f7266ba82f..ff5dc1673b 100644
--- a/hipamd/api/hip/hip_internal.hpp
+++ b/hipamd/api/hip/hip_internal.hpp
@@ -72,6 +72,7 @@ namespace hip {
extern amd::Context* getCurrentContext();
extern void setCurrentContext(unsigned int index);
+ extern amd::HostQueue* getNullStream(amd::Context&);
extern amd::HostQueue* getNullStream();
extern void syncStreams();
diff --git a/hipamd/api/hip/hip_memory.cpp b/hipamd/api/hip/hip_memory.cpp
index ab33573d72..0ae9df3fc3 100644
--- a/hipamd/api/hip/hip_memory.cpp
+++ b/hipamd/api/hip/hip_memory.cpp
@@ -103,6 +103,53 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin
command = new amd::ReadMemoryCommand(queue, CL_COMMAND_READ_BUFFER, waitList,
*srcMemory->asBuffer(), sOffset, sizeBytes, dst);
} else if ((srcMemory != nullptr) && (dstMemory != nullptr)) {
+ static const uint hostMem = CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_USE_HOST_PTR;
+ if ((kind == hipMemcpyDeviceToDevice ||
+ kind == hipMemcpyDefault) &&
+ ((srcMemory->getMemFlags() & hostMem) == 0) &&
+ ((dstMemory->getMemFlags() & hostMem) == 0)) {
+ amd::Device* queueDevice = &queue.device();
+ if (queueDevice != srcMemory->getContext().devices()[0]) {
+ void* staging = nullptr;
+ ihipMalloc(&staging, sizeBytes, CL_MEM_SVM_FINE_GRAIN_BUFFER);
+ ihipMemcpy(staging, src, sizeBytes, hipMemcpyDeviceToHost, *hip::getNullStream(srcMemory->getContext()));
+ ihipMemcpy(dst, staging, sizeBytes, hipMemcpyHostToDevice, queue);
+ hipFree(staging);
+#if 0
+ amd::Coord3D srcOffset(sOffset, 0, 0);
+ amd::Coord3D dstOffset(dOffset, 0, 0);
+ amd::Coord3D copySize(sizeBytes, 1, 1);
+ command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList,
+ *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, copySize);
+ command->enqueue();
+ if (!isAsync) {
+ command->awaitCompletion();
+ }
+ command->release();
+#endif
+ return hipSuccess;
+ }
+ if (queueDevice != dstMemory->getContext().devices()[0]) {
+ void* staging = nullptr;
+ ihipMalloc(&staging, sizeBytes, CL_MEM_SVM_FINE_GRAIN_BUFFER);
+ ihipMemcpy(staging, src, sizeBytes, hipMemcpyDeviceToHost, queue);
+ ihipMemcpy(dst, staging, sizeBytes, hipMemcpyHostToDevice, *hip::getNullStream(dstMemory->getContext()));
+ hipFree(staging);
+#if 0
+ amd::Coord3D srcOffset(sOffset, 0, 0);
+ amd::Coord3D dstOffset(dOffset, 0, 0);
+ amd::Coord3D copySize(sizeBytes, 1, 1);
+ command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList,
+ *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, copySize);
+ command->enqueue();
+ if (!isAsync) {
+ command->awaitCompletion();
+ }
+ command->release();
+#endif
+ return hipSuccess;
+ }
+ }
command = new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER, waitList,
*srcMemory->asBuffer(),*dstMemory->asBuffer(), sOffset, dOffset, sizeBytes);
}