P4 to Git Change 1101352 by gandryey@gera-dev-w7 on 2014/11/28 18:03:18

ECR #304775 - Make optimization for read map of USWC memory
	- If runtime detects USWC map with read operation, then it will switch to indirect map. This should improve map-read  performance on APU(s)  when USWC memory is used instead of frame buffer

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_memobj.cpp#72 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.cpp#269 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.hpp#89 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#172 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#234 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#486 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#134 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#112 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.hpp#43 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#340 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsadevice.cpp#88 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsadevice.hpp#45 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsamemory.cpp#42 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsamemory.hpp#27 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsavirtual.cpp#98 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.cpp#21 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsamemory.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsamemory.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsavirtual.cpp#26 edit


[ROCm/clr commit: 6d464be252]
Esse commit está contido em:
foreman
2014-11-28 18:11:36 -05:00
commit f43897e51e
14 arquivos alterados com 75 adições e 65 exclusões
@@ -1123,6 +1123,7 @@ Device::allocMapTarget(
amd::Memory& mem,
const amd::Coord3D& origin,
const amd::Coord3D& region,
uint mapFlags,
size_t* rowPitch,
size_t* slicePitch)
{
@@ -143,6 +143,7 @@ public:
amd::Memory& mem, //!< Abstraction layer memory object
const amd::Coord3D& origin, //!< The map location in memory
const amd::Coord3D& region, //!< The map region in memory
uint mapFlags, //!< Map flags
size_t* rowPitch = NULL, //!< Row pitch for the mapped memory
size_t* slicePitch = NULL //!< Slice for the mapped memory
);
+11 -5
Ver Arquivo
@@ -572,15 +572,21 @@ Kernel::~Kernel()
}
void
Memory::saveWriteMapInfo(
Memory::saveMapInfo(
const amd::Coord3D origin,
const amd::Coord3D region,
uint mapFlags,
bool entire)
{
writeMapInfo_.origin_ = origin;
writeMapInfo_.region_ = region;
writeMapInfo_.entire_ = entire;
flags_ |= UnmapWrite;
if (mapFlags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) {
writeMapInfo_.origin_ = origin;
writeMapInfo_.region_ = region;
writeMapInfo_.entire_ = entire;
flags_ |= UnmapWrite;
}
if (mapFlags & CL_MAP_READ) {
flags_ |= UnmapRead;
}
}
Program::Program(amd::Device& device)
+10 -4
Ver Arquivo
@@ -727,20 +727,24 @@ public:
//! Saves map info for this object
//! @note: It's not a thread safe operation, the app must implement
//! synchronization for the multiple write maps if necessary
void saveWriteMapInfo(
void saveMapInfo(
const amd::Coord3D origin, //!< Origin of the map location
const amd::Coord3D region, //!< Mapped region
uint mapFlags, //< Map flags
bool entire //!< True if the enitre memory was mapped
);
const WriteMapInfo* writeMapInfo() const { return &writeMapInfo_; }
//! Clear memory object as mapped read only
void clearUnmapWrite() { flags_ &= ~UnmapWrite; }
void clearUnmapFlags() { flags_ &= ~(UnmapWrite | UnmapRead); }
//! Returns state of map read only flag
//! Returns state of map write flag
bool isUnmapWrite() const { return (flags_ & UnmapWrite) ? true : false; }
//! Returns state of map read flag
bool isUnmapRead() const { return (flags_ & UnmapRead) ? true : false; }
//! Returns state of memory direct access flag
bool isHostMemDirectAccess() const
{ return (flags_ & HostMemoryDirectAccess) ? true : false; }
@@ -754,9 +758,10 @@ protected:
HostMemoryDirectAccess = 0x00000001, //!< GPU has direct access to the host memory
MapResourceAlloced = 0x00000002, //!< Map resource was allocated
PinnedMemoryAlloced = 0x00000004, //!< An extra pinned resource was allocated
UnmapWrite = 0x00000008, //!< Memory was mapped read-only
UnmapWrite = 0x00000008, //!< Memory was mapped for write
SubMemoryObject = 0x00000010, //!< Memory is sub-memory
HostMemoryRegistered = 0x00000020, //!< Host memory was registered
UnmapRead = 0x00000040, //!< Memory was mapped for read
};
uint flags_; //!< Memory object flags
@@ -1587,6 +1592,7 @@ public:
amd::Memory& mem, //!< Abstraction layer memory object
const amd::Coord3D& origin, //!< The map location in memory
const amd::Coord3D& region, //!< The map region in memory
uint mapFlags, //!< Map flags
size_t* rowPitch = NULL, //!< Row pitch for the mapped memory
size_t* slicePitch = NULL //!< Slice for the mapped memory
) = 0;
@@ -2098,6 +2098,7 @@ Device::allocMapTarget(
amd::Memory& mem,
const amd::Coord3D& origin,
const amd::Coord3D& region,
uint mapFlags,
size_t* rowPitch,
size_t* slicePitch)
{
@@ -2109,7 +2110,7 @@ Device::allocMapTarget(
}
// Pass request over to memory
return memory->allocMapTarget(origin, region, rowPitch, slicePitch);
return memory->allocMapTarget(origin, region, mapFlags, rowPitch, slicePitch);
}
bool
@@ -111,6 +111,7 @@ public:
amd::Memory& mem, //!< Abstraction layer memory object
const amd::Coord3D& origin, //!< The map location in memory
const amd::Coord3D& region, //!< The map region in memory
uint mapFlags, //!< Map flags
size_t* rowPitch = NULL, //!< Row pitch for the mapped memory
size_t* slicePitch = NULL //!< Slice for the mapped memory
) { return NULL; }
@@ -437,6 +438,7 @@ public:
amd::Memory& mem, //!< Abstraction layer memory object
const amd::Coord3D& origin, //!< The map location in memory
const amd::Coord3D& region, //!< The map region in memory
uint mapFlags, //!< Map flags
size_t* rowPitch = NULL, //!< Row pitch for the mapped memory
size_t* slicePitch = NULL //!< Slice for the mapped memory
);
@@ -906,6 +906,7 @@ void*
Memory::allocMapTarget(
const amd::Coord3D& origin,
const amd::Coord3D& region,
uint mapFlags,
size_t* rowPitch,
size_t* slicePitch)
{
@@ -921,7 +922,8 @@ Memory::allocMapTarget(
incIndMapCount();
// If host memory exists, use it
if (owner()->getHostMem() != NULL) {
if ((owner()->getHostMem() != NULL) &&
(isCacheable() || !isHostMemDirectAccess() || !(mapFlags & CL_MAP_READ))) {
mapAddress = reinterpret_cast<address>(owner()->getHostMem());
}
// If resource is a persistent allocation, we can use it directly
@@ -1226,6 +1228,7 @@ void*
Image::allocMapTarget(
const amd::Coord3D& origin,
const amd::Coord3D& region,
uint mapFlags,
size_t* rowPitch,
size_t* slicePitch)
{
@@ -1245,7 +1248,8 @@ Image::allocMapTarget(
incIndMapCount();
// If host memory exists, use it
if (owner()->getHostMem() != NULL) {
if ((owner()->getHostMem() != NULL) &&
(isCacheable() || !isHostMemDirectAccess() || !(mapFlags & CL_MAP_READ))) {
useRemoteResource = false;
mapAddress = reinterpret_cast<address>(owner()->getHostMem());
amd::Image* amdImage = owner()->asImage();
@@ -121,6 +121,7 @@ public:
virtual void* allocMapTarget(
const amd::Coord3D& origin, //!< The map location in memory
const amd::Coord3D& region, //!< The map region in memory
uint mapFlags, //!< Map flags
size_t* rowPitch = NULL, //!< Row pitch for the mapped memory
size_t* slicePitch = NULL //!< Slice for the mapped memory
);
@@ -288,6 +289,7 @@ public:
virtual void* allocMapTarget(
const amd::Coord3D& origin, //!< The map location in memory
const amd::Coord3D& region, //!< The map region in memory
uint mapFlags, //!< Map flags
size_t* rowPitch = NULL, //!< Row pitch for the mapped memory
size_t* slicePitch = NULL //!< Slice for the mapped memory
);
@@ -1085,14 +1085,15 @@ VirtualGPU::submitMapMemory(amd::MapMemoryCommand& vcmd)
gpu::Memory* memory = dev().getGpuMemory(&vcmd.memory());
// Save write map info for unmap copy
if (vcmd.mapFlags() & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) {
memory->saveWriteMapInfo(vcmd.origin(),
vcmd.size(), vcmd.isEntireMemory());
}
// Save map info for unmap operation
memory->saveMapInfo(vcmd.origin(), vcmd.size(),
vcmd.mapFlags(), vcmd.isEntireMemory());
// If we have host memory, use it
if (memory->owner()->getHostMem() != NULL) {
if ((memory->owner()->getHostMem() != NULL) &&
(memory->isCacheable() ||
!memory->isHostMemDirectAccess() ||
!(vcmd.mapFlags() & CL_MAP_READ))) {
if (!memory->isHostMemDirectAccess()) {
// Make sure GPU finished operation before
// synchronization with the backing store
@@ -1177,7 +1178,10 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd)
amd::Memory* owner = memory->owner();
// We used host memory
if (owner->getHostMem() != NULL) {
if ((owner->getHostMem() != NULL) &&
(memory->isCacheable() ||
!memory->isHostMemDirectAccess() ||
!memory->isUnmapRead())) {
if (memory->isUnmapWrite()) {
// Target is the backing store, so sync
owner->signalWrite(NULL);
@@ -1254,8 +1258,8 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd)
vcmd.setStatus(CL_INVALID_VALUE);
}
// Clear read only flag
memory->clearUnmapWrite();
// Clear unmap flags
memory->clearUnmapFlags();
profilingEnd(vcmd);
}
@@ -1357,31 +1361,20 @@ VirtualGPU::submitSvmMapMemory(amd::SvmMapMemoryCommand& vcmd)
profilingBegin(vcmd, true);
//check if the ptr is in the svm space
amd::Memory* svmMem = vcmd.getSvmMem();
if (NULL == svmMem) {
LogWarning("wrong svm address ");
vcmd.setStatus(CL_INVALID_VALUE);
return;
}
// Make sure we have memory for the command execution
gpu::Memory* memory = dev().getGpuMemory(svmMem);
gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem());
if (vcmd.mapFlags() & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) {
memory->saveWriteMapInfo(vcmd.origin(), vcmd.size(), vcmd.isEntireMemory());
}
memory->saveMapInfo(vcmd.origin(), vcmd.size(),
vcmd.mapFlags(), vcmd.isEntireMemory());
if (memory->mapMemory() != NULL) {
if (vcmd.mapFlags() & (CL_MAP_READ | CL_MAP_WRITE)) {
amd::Coord3D dstOrigin(0, 0, 0);
if (memory->cal()->buffer_) {
if (!blitMgr().copyBuffer(*memory,
*memory->mapMemory(), vcmd.origin(), dstOrigin,
vcmd.size(), vcmd.isEntireMemory())) {
LogError("submitSVMMapMemory() - copy failed");
vcmd.setStatus(CL_MAP_FAILURE);
}
assert(memory->cal()->buffer_ && "SVM memory can't be an image");
if (!blitMgr().copyBuffer(*memory, *memory->mapMemory(),
vcmd.origin(), dstOrigin, vcmd.size(), vcmd.isEntireMemory())) {
LogError("submitSVMMapMemory() - copy failed");
vcmd.setStatus(CL_MAP_FAILURE);
}
}
}
@@ -1399,30 +1392,18 @@ VirtualGPU::submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& vcmd)
amd::ScopedLock lock(execution());
profilingBegin(vcmd, true);
amd::Memory* svmMem = vcmd.getSvmMem();
if (NULL == svmMem) {
LogWarning("wrong svm address ");
vcmd.setStatus(CL_INVALID_VALUE);
return;
}
gpu::Memory* memory = dev().getGpuMemory(svmMem);
gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem());
if (memory->mapMemory() != NULL) {
if (memory->isUnmapWrite()) {
amd::Coord3D srcOrigin(0, 0, 0);
// Target is a remote resource, so copy
assert(memory->mapMemory() != NULL);
if (memory->cal()->buffer_) {
if (!blitMgr().copyBuffer(
*memory->mapMemory(), *memory,
srcOrigin,
memory->writeMapInfo()->origin_,
memory->writeMapInfo()->region_,
memory->writeMapInfo()->entire_)) {
LogError("submitUnmapMemory() - copy failed");
vcmd.setStatus(CL_OUT_OF_RESOURCES);
}
assert(memory->cal()->buffer_ && "SVM memory can't be an image");
if (!blitMgr().copyBuffer(*memory->mapMemory(), *memory, srcOrigin,
memory->writeMapInfo()->origin_, memory->writeMapInfo()->region_,
memory->writeMapInfo()->entire_)) {
LogError("submitSvmUnmapMemory() - copy failed");
vcmd.setStatus(CL_OUT_OF_RESOURCES);
}
}
}
@@ -685,6 +685,7 @@ Device::allocMapTarget(
amd::Memory& mem,
const amd::Coord3D& origin,
const amd::Coord3D& region,
uint mapFlags,
size_t* rowPitch,
size_t* slicePitch)
{
@@ -696,7 +697,7 @@ Device::allocMapTarget(
}
// Pass request over to memory
return memory->allocMapTarget(origin, region, rowPitch, slicePitch);
return memory->allocMapTarget(origin, region, mapFlags, rowPitch, slicePitch);
}
bool
@@ -191,6 +191,7 @@ public:
amd::Memory& mem, //!< Abstraction layer memory object
const amd::Coord3D& origin, //!< The map location in memory
const amd::Coord3D& region, //!< The map region in memory
uint mapFlags, //!< Map flags
size_t* rowPitch = NULL, //!< Row pitch for the mapped memory
size_t* slicePitch = NULL //!< Slice for the mapped memory
) {
@@ -323,6 +324,7 @@ public:
amd::Memory &mem, //!< Abstraction layer memory object
const amd::Coord3D &origin, //!< The map location in memory
const amd::Coord3D &region, //!< The map region in memory
uint mapFlags, //!< Map flags
size_t *rowPitch = NULL, //!< Row pitch for the mapped memory
size_t *slicePitch = NULL //!< Slice for the mapped memory
);
@@ -79,6 +79,7 @@ Memory::freeMapMemory()
void *
Memory::allocMapTarget(const amd::Coord3D &origin,
const amd::Coord3D &region,
uint mapFlags,
size_t *rowPitch,
size_t *slicePitch)
{
@@ -146,7 +147,7 @@ Memory::cpuMap(
{
// Create the map target.
void * mapTarget =
allocMapTarget(amd::Coord3D(0), amd::Coord3D(0), rowPitch, slicePitch);
allocMapTarget(amd::Coord3D(0), amd::Coord3D(0), 0, rowPitch, slicePitch);
// Sync to map target if no direct access.
if (!isHostMemDirectAccess()) {
@@ -862,6 +863,7 @@ Image::createView(Image &parent)
void* Image::allocMapTarget(const amd::Coord3D& origin,
const amd::Coord3D& region,
uint mapFlags,
size_t* rowPitch,
size_t* slicePitch)
{
@@ -35,6 +35,7 @@ class Memory : public device::Memory {
// of an indirect map for a given memory object
virtual void *allocMapTarget(const amd::Coord3D &origin,
const amd::Coord3D &region,
uint mapFlags,
size_t *rowPitch,
size_t *slicePitch);
@@ -168,6 +169,7 @@ public:
//! of an indirect map for a given memory object
virtual void* allocMapTarget(const amd::Coord3D& origin,
const amd::Coord3D& region,
uint mapFlags,
size_t* rowPitch,
size_t* slicePitch);
@@ -589,9 +589,8 @@ void VirtualGPU::submitMapMemory(amd::MapMemoryCommand &cmd)
// Save map write requirement.
if (mapFlag & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) {
devMemory->saveWriteMapInfo(cmd.origin(),
cmd.size(),
cmd.isEntireMemory());
devMemory->saveMapInfo(cmd.origin(), cmd.size(),
mapFlag, cmd.isEntireMemory());
}
// Sync to the map target.
@@ -686,7 +685,7 @@ void VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand &cmd)
}
}
devMemory->clearUnmapWrite();
devMemory->clearUnmapFlags();
cmd.memory().signalWrite(&dev());
}