P4 to Git Change 1771336 by cpaquot@cpaquot-ocl-lc-lnx on 2019/04/17 18:19:42
SWDEV-145570 - [HIP] Use staging buffer to copy peer to peer. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#18 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#27 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#53 edit
这个提交包含在:
@@ -75,21 +75,25 @@ void setCurrentContext(unsigned int index) {
|
||||
g_context = g_devices[index];
|
||||
}
|
||||
|
||||
amd::HostQueue* getNullStream() {
|
||||
auto stream = g_nullStreams.find(getCurrentContext());
|
||||
amd::HostQueue* getNullStream(amd::Context& context) {
|
||||
auto stream = g_nullStreams.find(&context);
|
||||
if (stream == g_nullStreams.end()) {
|
||||
amd::Device* device = getCurrentContext()->devices()[0];
|
||||
amd::Device* device = context.devices()[0];
|
||||
cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE;
|
||||
amd::HostQueue* queue = new amd::HostQueue(*hip::getCurrentContext(), *device, properties,
|
||||
amd::HostQueue* queue = new amd::HostQueue(context, *device, properties,
|
||||
amd::CommandQueue::RealTimeDisabled,
|
||||
amd::CommandQueue::Priority::Normal);
|
||||
g_nullStreams[getCurrentContext()] = queue;
|
||||
g_nullStreams[&context] = queue;
|
||||
return queue;
|
||||
}
|
||||
syncStreams();
|
||||
return stream->second;
|
||||
}
|
||||
|
||||
amd::HostQueue* getNullStream() {
|
||||
return getNullStream(*getCurrentContext());
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
using namespace hip;
|
||||
|
||||
@@ -72,6 +72,7 @@ namespace hip {
|
||||
extern amd::Context* getCurrentContext();
|
||||
extern void setCurrentContext(unsigned int index);
|
||||
|
||||
extern amd::HostQueue* getNullStream(amd::Context&);
|
||||
extern amd::HostQueue* getNullStream();
|
||||
extern void syncStreams();
|
||||
|
||||
|
||||
@@ -103,6 +103,53 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin
|
||||
command = new amd::ReadMemoryCommand(queue, CL_COMMAND_READ_BUFFER, waitList,
|
||||
*srcMemory->asBuffer(), sOffset, sizeBytes, dst);
|
||||
} else if ((srcMemory != nullptr) && (dstMemory != nullptr)) {
|
||||
static const uint hostMem = CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_USE_HOST_PTR;
|
||||
if ((kind == hipMemcpyDeviceToDevice ||
|
||||
kind == hipMemcpyDefault) &&
|
||||
((srcMemory->getMemFlags() & hostMem) == 0) &&
|
||||
((dstMemory->getMemFlags() & hostMem) == 0)) {
|
||||
amd::Device* queueDevice = &queue.device();
|
||||
if (queueDevice != srcMemory->getContext().devices()[0]) {
|
||||
void* staging = nullptr;
|
||||
ihipMalloc(&staging, sizeBytes, CL_MEM_SVM_FINE_GRAIN_BUFFER);
|
||||
ihipMemcpy(staging, src, sizeBytes, hipMemcpyDeviceToHost, *hip::getNullStream(srcMemory->getContext()));
|
||||
ihipMemcpy(dst, staging, sizeBytes, hipMemcpyHostToDevice, queue);
|
||||
hipFree(staging);
|
||||
#if 0
|
||||
amd::Coord3D srcOffset(sOffset, 0, 0);
|
||||
amd::Coord3D dstOffset(dOffset, 0, 0);
|
||||
amd::Coord3D copySize(sizeBytes, 1, 1);
|
||||
command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList,
|
||||
*srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, copySize);
|
||||
command->enqueue();
|
||||
if (!isAsync) {
|
||||
command->awaitCompletion();
|
||||
}
|
||||
command->release();
|
||||
#endif
|
||||
return hipSuccess;
|
||||
}
|
||||
if (queueDevice != dstMemory->getContext().devices()[0]) {
|
||||
void* staging = nullptr;
|
||||
ihipMalloc(&staging, sizeBytes, CL_MEM_SVM_FINE_GRAIN_BUFFER);
|
||||
ihipMemcpy(staging, src, sizeBytes, hipMemcpyDeviceToHost, queue);
|
||||
ihipMemcpy(dst, staging, sizeBytes, hipMemcpyHostToDevice, *hip::getNullStream(dstMemory->getContext()));
|
||||
hipFree(staging);
|
||||
#if 0
|
||||
amd::Coord3D srcOffset(sOffset, 0, 0);
|
||||
amd::Coord3D dstOffset(dOffset, 0, 0);
|
||||
amd::Coord3D copySize(sizeBytes, 1, 1);
|
||||
command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList,
|
||||
*srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, copySize);
|
||||
command->enqueue();
|
||||
if (!isAsync) {
|
||||
command->awaitCompletion();
|
||||
}
|
||||
command->release();
|
||||
#endif
|
||||
return hipSuccess;
|
||||
}
|
||||
}
|
||||
command = new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER, waitList,
|
||||
*srcMemory->asBuffer(),*dstMemory->asBuffer(), sOffset, dOffset, sizeBytes);
|
||||
}
|
||||
|
||||
在新工单中引用
屏蔽一个用户