P4 to Git Change 1323915 by gandryey@gera-w8 on 2016/10/07 12:59:30

SWDEV-104441 - [SSG] OpenCL has not implemented the asynchronous transfer
	- Use lock protection for multiple maps of persistent memory
	- Don't mark persistent as host mem
	- Implement file write for invisible memory

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#129 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#236 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#408 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#77 edit


[ROCm/clr commit: 9d739c200c]
Bu işleme şunda yer alıyor:
foreman
2016-10-07 13:07:19 -04:00
ebeveyn d3a47a083b
işleme 3e62df9d63
5 değiştirilmiş dosya ile 67 ekleme ve 24 silme
-1
Dosyayı Görüntüle
@@ -171,7 +171,6 @@ Memory::create(
// Check if CAL created a resource
if (result) {
switch (memoryType()) {
case Resource::Persistent:
case Resource::Pinned:
case Resource::ExternalPhysical:
// Marks memory object for direct GPU access to the host memory
+3 -1
Dosyayı Görüntüle
@@ -1567,7 +1567,9 @@ Resource::map(VirtualGPU* gpu, uint flags, uint startLayer, uint numLayers)
// guarantee that the address will be valid,
// since GSL could still process the first map
if (address_ == NULL) {
amd::Os::sleep(10);
for (uint i = 0; address_ == NULL && i < 10; ++i) {
amd::Os::sleep(1);
}
assert((address_ != NULL) && "Multiple maps failed!");
}
+43 -18
Dosyayı Görüntüle
@@ -3562,31 +3562,56 @@ VirtualGPU::submitTransferBufferFromFile(amd::TransferBufferFileCommand& cmd)
{
size_t copySize = cmd.size()[0];
size_t fileOffset = cmd.fileOffset();
size_t srcDstOffset = cmd.origin()[0];
Memory* mem = dev().getGpuMemory(&cmd.memory());
uint idx = 0;
assert((cmd.type() == CL_COMMAND_READ_SSG_FILE_AMD) ||
(cmd.type() == CL_COMMAND_WRITE_SSG_FILE_AMD));
bool writeBuffer(cmd.type() == CL_COMMAND_READ_SSG_FILE_AMD);
(cmd.type() == CL_COMMAND_WRITE_SSG_FILE_AMD));
const bool writeBuffer(cmd.type() == CL_COMMAND_READ_SSG_FILE_AMD);
while (copySize > 0) {
Memory* staging = dev().getGpuMemory(&cmd.staging(idx));
size_t srcDstSize = amd::TransferBufferFileCommand::StagingBufferSize;
srcDstSize = std::min(srcDstSize, copySize);
void* srcDstBuffer = staging->cpuMap(*this);
if (!cmd.file()->transferBlock(writeBuffer,
srcDstBuffer, staging->size(), fileOffset, 0, srcDstSize)) {
cmd.setStatus(CL_INVALID_OPERATION);
return;
if (writeBuffer) {
size_t dstOffset = cmd.origin()[0];
while (copySize > 0) {
Memory* staging = dev().getGpuMemory(&cmd.staging(idx));
size_t dstSize = amd::TransferBufferFileCommand::StagingBufferSize;
dstSize = std::min(dstSize, copySize);
void* dstBuffer = staging->cpuMap(*this);
if (!cmd.file()->transferBlock(writeBuffer,
dstBuffer, staging->size(), fileOffset, 0, dstSize)) {
cmd.setStatus(CL_INVALID_OPERATION);
return;
}
staging->cpuUnmap(*this);
bool result = blitMgr().copyBuffer(*staging, *mem,
0, dstOffset, dstSize, false);
flushDMA(getGpuEvent(staging->gslResource())->engineId_);
fileOffset += dstSize;
dstOffset += dstSize;
copySize -= dstSize;
}
staging->cpuUnmap(*this);
}
else {
size_t srcOffset = cmd.origin()[0];
while (copySize > 0) {
Memory* staging = dev().getGpuMemory(&cmd.staging(idx));
size_t srcSize = amd::TransferBufferFileCommand::StagingBufferSize;
srcSize = std::min(srcSize, copySize);
bool result = blitMgr().copyBuffer(*mem, *staging,
srcOffset, 0, srcSize, false);
bool result = blitMgr().copyBuffer(*staging, *mem,
fileOffset, srcDstOffset, srcDstSize, false);
flushDMA(getGpuEvent(staging->gslResource())->engineId_);
srcDstOffset += srcDstSize;
copySize -= srcDstSize;
void* srcBuffer = staging->cpuMap(*this);
if (!cmd.file()->transferBlock(writeBuffer,
srcBuffer, staging->size(), fileOffset, 0, srcSize)) {
cmd.setStatus(CL_INVALID_OPERATION);
return;
}
staging->cpuUnmap(*this);
fileOffset += srcSize;
srcOffset += srcSize;
copySize -= srcSize;
}
}
}
+4 -2
Dosyayı Görüntüle
@@ -1654,9 +1654,11 @@ Resource::map(VirtualGPU* gpu, uint flags, uint startLayer, uint numLayers)
//! \note the atomic operation with counter doesn't
// guarantee that the address will be valid,
// since GSL could still process the first map
// since PAL could still process the first map
if (address_ == nullptr) {
amd::Os::sleep(10);
for (uint i = 0; address_ == NULL && i < 10; ++i) {
amd::Os::sleep(1);
}
assert((address_ != nullptr) && "Multiple maps failed!");
}
+17 -2
Dosyayı Görüntüle
@@ -592,7 +592,15 @@ TransferBufferFileCommand::submit(device::VirtualDevice& device)
device::Memory* mem = memory_->getDeviceMemory(queue()->device());
if (memory_->getMemFlags() & (CL_MEM_USE_HOST_PTR |
CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_PERSISTENT_MEM_AMD)) {
void* srcDstBuffer = mem->cpuMap(device);
void* srcDstBuffer = nullptr;
if (memory_->getMemFlags() & CL_MEM_USE_PERSISTENT_MEM_AMD) {
// Lock protected multiple maps for persistent memory
amd::ScopedLock lock(mem->owner()->lockMemoryOps());
srcDstBuffer = mem->cpuMap(device);
}
else {
srcDstBuffer = mem->cpuMap(device);
}
// Make HD transfer to the host accessible memory
bool writeBuffer(type() == CL_COMMAND_READ_SSG_FILE_AMD);
if (!file()->transferBlock(writeBuffer, srcDstBuffer, mem->size(),
@@ -600,7 +608,14 @@ TransferBufferFileCommand::submit(device::VirtualDevice& device)
setStatus(CL_INVALID_OPERATION);
return;
}
mem->cpuUnmap(device);
if (memory_->getMemFlags() & CL_MEM_USE_PERSISTENT_MEM_AMD) {
// Lock protected multiple maps for persistent memory
amd::ScopedLock lock(mem->owner()->lockMemoryOps());
mem->cpuUnmap(device);
}
else {
mem->cpuUnmap(device);
}
}
else {
device.submitTransferBufferFromFile(*this);