P4 to Git Change 1323915 by gandryey@gera-w8 on 2016/10/07 12:59:30
SWDEV-104441 - [SSG] OpenCL has not implemented the asynchronous transfer
- Use lock protection for multiple maps of persistent memory
- Don't mark persistent as host mem
- Implement file write for invisible memory
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#129 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#236 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#408 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#77 edit
[ROCm/clr commit: 9d739c200c]
Bu işleme şunda yer alıyor:
@@ -171,7 +171,6 @@ Memory::create(
|
||||
// Check if CAL created a resource
|
||||
if (result) {
|
||||
switch (memoryType()) {
|
||||
case Resource::Persistent:
|
||||
case Resource::Pinned:
|
||||
case Resource::ExternalPhysical:
|
||||
// Marks memory object for direct GPU access to the host memory
|
||||
|
||||
@@ -1567,7 +1567,9 @@ Resource::map(VirtualGPU* gpu, uint flags, uint startLayer, uint numLayers)
|
||||
// guarantee that the address will be valid,
|
||||
// since GSL could still process the first map
|
||||
if (address_ == NULL) {
|
||||
amd::Os::sleep(10);
|
||||
for (uint i = 0; address_ == NULL && i < 10; ++i) {
|
||||
amd::Os::sleep(1);
|
||||
}
|
||||
assert((address_ != NULL) && "Multiple maps failed!");
|
||||
}
|
||||
|
||||
|
||||
@@ -3562,31 +3562,56 @@ VirtualGPU::submitTransferBufferFromFile(amd::TransferBufferFileCommand& cmd)
|
||||
{
|
||||
size_t copySize = cmd.size()[0];
|
||||
size_t fileOffset = cmd.fileOffset();
|
||||
size_t srcDstOffset = cmd.origin()[0];
|
||||
Memory* mem = dev().getGpuMemory(&cmd.memory());
|
||||
uint idx = 0;
|
||||
|
||||
assert((cmd.type() == CL_COMMAND_READ_SSG_FILE_AMD) ||
|
||||
(cmd.type() == CL_COMMAND_WRITE_SSG_FILE_AMD));
|
||||
bool writeBuffer(cmd.type() == CL_COMMAND_READ_SSG_FILE_AMD);
|
||||
(cmd.type() == CL_COMMAND_WRITE_SSG_FILE_AMD));
|
||||
const bool writeBuffer(cmd.type() == CL_COMMAND_READ_SSG_FILE_AMD);
|
||||
|
||||
while (copySize > 0) {
|
||||
Memory* staging = dev().getGpuMemory(&cmd.staging(idx));
|
||||
size_t srcDstSize = amd::TransferBufferFileCommand::StagingBufferSize;
|
||||
srcDstSize = std::min(srcDstSize, copySize);
|
||||
void* srcDstBuffer = staging->cpuMap(*this);
|
||||
if (!cmd.file()->transferBlock(writeBuffer,
|
||||
srcDstBuffer, staging->size(), fileOffset, 0, srcDstSize)) {
|
||||
cmd.setStatus(CL_INVALID_OPERATION);
|
||||
return;
|
||||
if (writeBuffer) {
|
||||
size_t dstOffset = cmd.origin()[0];
|
||||
while (copySize > 0) {
|
||||
Memory* staging = dev().getGpuMemory(&cmd.staging(idx));
|
||||
size_t dstSize = amd::TransferBufferFileCommand::StagingBufferSize;
|
||||
dstSize = std::min(dstSize, copySize);
|
||||
void* dstBuffer = staging->cpuMap(*this);
|
||||
if (!cmd.file()->transferBlock(writeBuffer,
|
||||
dstBuffer, staging->size(), fileOffset, 0, dstSize)) {
|
||||
cmd.setStatus(CL_INVALID_OPERATION);
|
||||
return;
|
||||
}
|
||||
staging->cpuUnmap(*this);
|
||||
|
||||
bool result = blitMgr().copyBuffer(*staging, *mem,
|
||||
0, dstOffset, dstSize, false);
|
||||
flushDMA(getGpuEvent(staging->gslResource())->engineId_);
|
||||
fileOffset += dstSize;
|
||||
dstOffset += dstSize;
|
||||
copySize -= dstSize;
|
||||
}
|
||||
staging->cpuUnmap(*this);
|
||||
}
|
||||
else {
|
||||
size_t srcOffset = cmd.origin()[0];
|
||||
while (copySize > 0) {
|
||||
Memory* staging = dev().getGpuMemory(&cmd.staging(idx));
|
||||
size_t srcSize = amd::TransferBufferFileCommand::StagingBufferSize;
|
||||
srcSize = std::min(srcSize, copySize);
|
||||
bool result = blitMgr().copyBuffer(*mem, *staging,
|
||||
srcOffset, 0, srcSize, false);
|
||||
|
||||
bool result = blitMgr().copyBuffer(*staging, *mem,
|
||||
fileOffset, srcDstOffset, srcDstSize, false);
|
||||
flushDMA(getGpuEvent(staging->gslResource())->engineId_);
|
||||
srcDstOffset += srcDstSize;
|
||||
copySize -= srcDstSize;
|
||||
void* srcBuffer = staging->cpuMap(*this);
|
||||
if (!cmd.file()->transferBlock(writeBuffer,
|
||||
srcBuffer, staging->size(), fileOffset, 0, srcSize)) {
|
||||
cmd.setStatus(CL_INVALID_OPERATION);
|
||||
return;
|
||||
}
|
||||
staging->cpuUnmap(*this);
|
||||
|
||||
fileOffset += srcSize;
|
||||
srcOffset += srcSize;
|
||||
copySize -= srcSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1654,9 +1654,11 @@ Resource::map(VirtualGPU* gpu, uint flags, uint startLayer, uint numLayers)
|
||||
|
||||
//! \note the atomic operation with counter doesn't
|
||||
// guarantee that the address will be valid,
|
||||
// since GSL could still process the first map
|
||||
// since PAL could still process the first map
|
||||
if (address_ == nullptr) {
|
||||
amd::Os::sleep(10);
|
||||
for (uint i = 0; address_ == NULL && i < 10; ++i) {
|
||||
amd::Os::sleep(1);
|
||||
}
|
||||
assert((address_ != nullptr) && "Multiple maps failed!");
|
||||
}
|
||||
|
||||
|
||||
@@ -592,7 +592,15 @@ TransferBufferFileCommand::submit(device::VirtualDevice& device)
|
||||
device::Memory* mem = memory_->getDeviceMemory(queue()->device());
|
||||
if (memory_->getMemFlags() & (CL_MEM_USE_HOST_PTR |
|
||||
CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_PERSISTENT_MEM_AMD)) {
|
||||
void* srcDstBuffer = mem->cpuMap(device);
|
||||
void* srcDstBuffer = nullptr;
|
||||
if (memory_->getMemFlags() & CL_MEM_USE_PERSISTENT_MEM_AMD) {
|
||||
// Lock protected multiple maps for persistent memory
|
||||
amd::ScopedLock lock(mem->owner()->lockMemoryOps());
|
||||
srcDstBuffer = mem->cpuMap(device);
|
||||
}
|
||||
else {
|
||||
srcDstBuffer = mem->cpuMap(device);
|
||||
}
|
||||
// Make HD transfer to the host accessible memory
|
||||
bool writeBuffer(type() == CL_COMMAND_READ_SSG_FILE_AMD);
|
||||
if (!file()->transferBlock(writeBuffer, srcDstBuffer, mem->size(),
|
||||
@@ -600,7 +608,14 @@ TransferBufferFileCommand::submit(device::VirtualDevice& device)
|
||||
setStatus(CL_INVALID_OPERATION);
|
||||
return;
|
||||
}
|
||||
mem->cpuUnmap(device);
|
||||
if (memory_->getMemFlags() & CL_MEM_USE_PERSISTENT_MEM_AMD) {
|
||||
// Lock protected multiple maps for persistent memory
|
||||
amd::ScopedLock lock(mem->owner()->lockMemoryOps());
|
||||
mem->cpuUnmap(device);
|
||||
}
|
||||
else {
|
||||
mem->cpuUnmap(device);
|
||||
}
|
||||
}
|
||||
else {
|
||||
device.submitTransferBufferFromFile(*this);
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle