P4 to Git Change 1771336 by cpaquot@cpaquot-ocl-lc-lnx on 2019/04/17 18:19:42

SWDEV-145570 - [HIP] Use staging buffer to copy peer to peer.

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#18 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#27 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#53 edit
这个提交包含在:
foreman
2019-04-17 18:38:30 -04:00
父节点 9b36b245c9
当前提交 9f5001466a
修改 3 个文件,包含 57 行新增5 行删除
+9 -5
查看文件
@@ -75,21 +75,25 @@ void setCurrentContext(unsigned int index) {
g_context = g_devices[index];
}
amd::HostQueue* getNullStream() {
auto stream = g_nullStreams.find(getCurrentContext());
amd::HostQueue* getNullStream(amd::Context& context) {
auto stream = g_nullStreams.find(&context);
if (stream == g_nullStreams.end()) {
amd::Device* device = getCurrentContext()->devices()[0];
amd::Device* device = context.devices()[0];
cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE;
amd::HostQueue* queue = new amd::HostQueue(*hip::getCurrentContext(), *device, properties,
amd::HostQueue* queue = new amd::HostQueue(context, *device, properties,
amd::CommandQueue::RealTimeDisabled,
amd::CommandQueue::Priority::Normal);
g_nullStreams[getCurrentContext()] = queue;
g_nullStreams[&context] = queue;
return queue;
}
syncStreams();
return stream->second;
}
amd::HostQueue* getNullStream() {
return getNullStream(*getCurrentContext());
}
};
using namespace hip;
+1
查看文件
@@ -72,6 +72,7 @@ namespace hip {
extern amd::Context* getCurrentContext();
extern void setCurrentContext(unsigned int index);
extern amd::HostQueue* getNullStream(amd::Context&);
extern amd::HostQueue* getNullStream();
extern void syncStreams();
+47
查看文件
@@ -103,6 +103,53 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin
command = new amd::ReadMemoryCommand(queue, CL_COMMAND_READ_BUFFER, waitList,
*srcMemory->asBuffer(), sOffset, sizeBytes, dst);
} else if ((srcMemory != nullptr) && (dstMemory != nullptr)) {
static const uint hostMem = CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_USE_HOST_PTR;
if ((kind == hipMemcpyDeviceToDevice ||
kind == hipMemcpyDefault) &&
((srcMemory->getMemFlags() & hostMem) == 0) &&
((dstMemory->getMemFlags() & hostMem) == 0)) {
amd::Device* queueDevice = &queue.device();
if (queueDevice != srcMemory->getContext().devices()[0]) {
void* staging = nullptr;
ihipMalloc(&staging, sizeBytes, CL_MEM_SVM_FINE_GRAIN_BUFFER);
ihipMemcpy(staging, src, sizeBytes, hipMemcpyDeviceToHost, *hip::getNullStream(srcMemory->getContext()));
ihipMemcpy(dst, staging, sizeBytes, hipMemcpyHostToDevice, queue);
hipFree(staging);
#if 0
amd::Coord3D srcOffset(sOffset, 0, 0);
amd::Coord3D dstOffset(dOffset, 0, 0);
amd::Coord3D copySize(sizeBytes, 1, 1);
command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList,
*srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, copySize);
command->enqueue();
if (!isAsync) {
command->awaitCompletion();
}
command->release();
#endif
return hipSuccess;
}
if (queueDevice != dstMemory->getContext().devices()[0]) {
void* staging = nullptr;
ihipMalloc(&staging, sizeBytes, CL_MEM_SVM_FINE_GRAIN_BUFFER);
ihipMemcpy(staging, src, sizeBytes, hipMemcpyDeviceToHost, queue);
ihipMemcpy(dst, staging, sizeBytes, hipMemcpyHostToDevice, *hip::getNullStream(dstMemory->getContext()));
hipFree(staging);
#if 0
amd::Coord3D srcOffset(sOffset, 0, 0);
amd::Coord3D dstOffset(dOffset, 0, 0);
amd::Coord3D copySize(sizeBytes, 1, 1);
command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList,
*srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, copySize);
command->enqueue();
if (!isAsync) {
command->awaitCompletion();
}
command->release();
#endif
return hipSuccess;
}
}
command = new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER, waitList,
*srcMemory->asBuffer(),*dstMemory->asBuffer(), sOffset, dOffset, sizeBytes);
}