From 9f5001466a7c5bc69cb2cdd5583d217c3fbf6416 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 17 Apr 2019 18:38:30 -0400 Subject: [PATCH] P4 to Git Change 1771336 by cpaquot@cpaquot-ocl-lc-lnx on 2019/04/17 18:19:42 SWDEV-145570 - [HIP] Use staging buffer to copy peer to peer. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#18 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#27 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#53 edit --- hipamd/api/hip/hip_context.cpp | 14 ++++++---- hipamd/api/hip/hip_internal.hpp | 1 + hipamd/api/hip/hip_memory.cpp | 47 +++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 5 deletions(-) diff --git a/hipamd/api/hip/hip_context.cpp b/hipamd/api/hip/hip_context.cpp index 02b57f0316..47bf651b8c 100644 --- a/hipamd/api/hip/hip_context.cpp +++ b/hipamd/api/hip/hip_context.cpp @@ -75,21 +75,25 @@ void setCurrentContext(unsigned int index) { g_context = g_devices[index]; } -amd::HostQueue* getNullStream() { - auto stream = g_nullStreams.find(getCurrentContext()); +amd::HostQueue* getNullStream(amd::Context& context) { + auto stream = g_nullStreams.find(&context); if (stream == g_nullStreams.end()) { - amd::Device* device = getCurrentContext()->devices()[0]; + amd::Device* device = context.devices()[0]; cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; - amd::HostQueue* queue = new amd::HostQueue(*hip::getCurrentContext(), *device, properties, + amd::HostQueue* queue = new amd::HostQueue(context, *device, properties, amd::CommandQueue::RealTimeDisabled, amd::CommandQueue::Priority::Normal); - g_nullStreams[getCurrentContext()] = queue; + g_nullStreams[&context] = queue; return queue; } syncStreams(); return stream->second; } +amd::HostQueue* getNullStream() { + return getNullStream(*getCurrentContext()); +} + }; using namespace hip; diff --git a/hipamd/api/hip/hip_internal.hpp b/hipamd/api/hip/hip_internal.hpp index f7266ba82f..ff5dc1673b 100644 --- a/hipamd/api/hip/hip_internal.hpp +++ b/hipamd/api/hip/hip_internal.hpp @@ -72,6 +72,7 @@ namespace hip { extern amd::Context* getCurrentContext(); extern void setCurrentContext(unsigned int index); + extern amd::HostQueue* getNullStream(amd::Context&); extern amd::HostQueue* getNullStream(); extern void syncStreams(); diff --git a/hipamd/api/hip/hip_memory.cpp b/hipamd/api/hip/hip_memory.cpp index ab33573d72..0ae9df3fc3 100644 --- a/hipamd/api/hip/hip_memory.cpp +++ b/hipamd/api/hip/hip_memory.cpp @@ -103,6 +103,53 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin command = new amd::ReadMemoryCommand(queue, CL_COMMAND_READ_BUFFER, waitList, *srcMemory->asBuffer(), sOffset, sizeBytes, dst); } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + static const uint hostMem = CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_USE_HOST_PTR; + if ((kind == hipMemcpyDeviceToDevice || + kind == hipMemcpyDefault) && + ((srcMemory->getMemFlags() & hostMem) == 0) && + ((dstMemory->getMemFlags() & hostMem) == 0)) { + amd::Device* queueDevice = &queue.device(); + if (queueDevice != srcMemory->getContext().devices()[0]) { + void* staging = nullptr; + ihipMalloc(&staging, sizeBytes, CL_MEM_SVM_FINE_GRAIN_BUFFER); + ihipMemcpy(staging, src, sizeBytes, hipMemcpyDeviceToHost, *hip::getNullStream(srcMemory->getContext())); + ihipMemcpy(dst, staging, sizeBytes, hipMemcpyHostToDevice, queue); + hipFree(staging); +#if 0 + amd::Coord3D srcOffset(sOffset, 0, 0); + amd::Coord3D dstOffset(dOffset, 0, 0); + amd::Coord3D copySize(sizeBytes, 1, 1); + command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, copySize); + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); +#endif + return hipSuccess; + } + if (queueDevice != dstMemory->getContext().devices()[0]) { + void* staging = nullptr; + ihipMalloc(&staging, sizeBytes, CL_MEM_SVM_FINE_GRAIN_BUFFER); + ihipMemcpy(staging, src, sizeBytes, hipMemcpyDeviceToHost, queue); + ihipMemcpy(dst, staging, sizeBytes, hipMemcpyHostToDevice, *hip::getNullStream(dstMemory->getContext())); + hipFree(staging); +#if 0 + amd::Coord3D srcOffset(sOffset, 0, 0); + amd::Coord3D dstOffset(dOffset, 0, 0); + amd::Coord3D copySize(sizeBytes, 1, 1); + command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, copySize); + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); +#endif + return hipSuccess; + } + } command = new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, *srcMemory->asBuffer(),*dstMemory->asBuffer(), sOffset, dOffset, sizeBytes); }