P4 to Git Change 1101352 by gandryey@gera-dev-w7 on 2014/11/28 18:03:18
ECR #304775 - Make optimization for read map of USWC memory
- If runtime detects USWC map with read operation, then it will switch to indirect map. This should improve map-read performance on APU(s) when USWC memory is used instead of frame buffer
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_memobj.cpp#72 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.cpp#269 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.hpp#89 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#172 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#234 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#486 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#134 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#112 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.hpp#43 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#340 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsadevice.cpp#88 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsadevice.hpp#45 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsamemory.cpp#42 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsamemory.hpp#27 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsavirtual.cpp#98 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.cpp#21 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsamemory.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsamemory.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsavirtual.cpp#26 edit
[ROCm/clr commit: 6d464be252]
Esse commit está contido em:
@@ -1123,6 +1123,7 @@ Device::allocMapTarget(
|
||||
amd::Memory& mem,
|
||||
const amd::Coord3D& origin,
|
||||
const amd::Coord3D& region,
|
||||
uint mapFlags,
|
||||
size_t* rowPitch,
|
||||
size_t* slicePitch)
|
||||
{
|
||||
|
||||
@@ -143,6 +143,7 @@ public:
|
||||
amd::Memory& mem, //!< Abstraction layer memory object
|
||||
const amd::Coord3D& origin, //!< The map location in memory
|
||||
const amd::Coord3D& region, //!< The map region in memory
|
||||
uint mapFlags, //!< Map flags
|
||||
size_t* rowPitch = NULL, //!< Row pitch for the mapped memory
|
||||
size_t* slicePitch = NULL //!< Slice for the mapped memory
|
||||
);
|
||||
|
||||
@@ -572,15 +572,21 @@ Kernel::~Kernel()
|
||||
}
|
||||
|
||||
void
|
||||
Memory::saveWriteMapInfo(
|
||||
Memory::saveMapInfo(
|
||||
const amd::Coord3D origin,
|
||||
const amd::Coord3D region,
|
||||
uint mapFlags,
|
||||
bool entire)
|
||||
{
|
||||
writeMapInfo_.origin_ = origin;
|
||||
writeMapInfo_.region_ = region;
|
||||
writeMapInfo_.entire_ = entire;
|
||||
flags_ |= UnmapWrite;
|
||||
if (mapFlags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) {
|
||||
writeMapInfo_.origin_ = origin;
|
||||
writeMapInfo_.region_ = region;
|
||||
writeMapInfo_.entire_ = entire;
|
||||
flags_ |= UnmapWrite;
|
||||
}
|
||||
if (mapFlags & CL_MAP_READ) {
|
||||
flags_ |= UnmapRead;
|
||||
}
|
||||
}
|
||||
|
||||
Program::Program(amd::Device& device)
|
||||
|
||||
@@ -727,20 +727,24 @@ public:
|
||||
//! Saves map info for this object
|
||||
//! @note: It's not a thread safe operation, the app must implement
|
||||
//! synchronization for the multiple write maps if necessary
|
||||
void saveWriteMapInfo(
|
||||
void saveMapInfo(
|
||||
const amd::Coord3D origin, //!< Origin of the map location
|
||||
const amd::Coord3D region, //!< Mapped region
|
||||
uint mapFlags, //< Map flags
|
||||
bool entire //!< True if the enitre memory was mapped
|
||||
);
|
||||
|
||||
const WriteMapInfo* writeMapInfo() const { return &writeMapInfo_; }
|
||||
|
||||
//! Clear memory object as mapped read only
|
||||
void clearUnmapWrite() { flags_ &= ~UnmapWrite; }
|
||||
void clearUnmapFlags() { flags_ &= ~(UnmapWrite | UnmapRead); }
|
||||
|
||||
//! Returns state of map read only flag
|
||||
//! Returns state of map write flag
|
||||
bool isUnmapWrite() const { return (flags_ & UnmapWrite) ? true : false; }
|
||||
|
||||
//! Returns state of map read flag
|
||||
bool isUnmapRead() const { return (flags_ & UnmapRead) ? true : false; }
|
||||
|
||||
//! Returns state of memory direct access flag
|
||||
bool isHostMemDirectAccess() const
|
||||
{ return (flags_ & HostMemoryDirectAccess) ? true : false; }
|
||||
@@ -754,9 +758,10 @@ protected:
|
||||
HostMemoryDirectAccess = 0x00000001, //!< GPU has direct access to the host memory
|
||||
MapResourceAlloced = 0x00000002, //!< Map resource was allocated
|
||||
PinnedMemoryAlloced = 0x00000004, //!< An extra pinned resource was allocated
|
||||
UnmapWrite = 0x00000008, //!< Memory was mapped read-only
|
||||
UnmapWrite = 0x00000008, //!< Memory was mapped for write
|
||||
SubMemoryObject = 0x00000010, //!< Memory is sub-memory
|
||||
HostMemoryRegistered = 0x00000020, //!< Host memory was registered
|
||||
UnmapRead = 0x00000040, //!< Memory was mapped for read
|
||||
};
|
||||
uint flags_; //!< Memory object flags
|
||||
|
||||
@@ -1587,6 +1592,7 @@ public:
|
||||
amd::Memory& mem, //!< Abstraction layer memory object
|
||||
const amd::Coord3D& origin, //!< The map location in memory
|
||||
const amd::Coord3D& region, //!< The map region in memory
|
||||
uint mapFlags, //!< Map flags
|
||||
size_t* rowPitch = NULL, //!< Row pitch for the mapped memory
|
||||
size_t* slicePitch = NULL //!< Slice for the mapped memory
|
||||
) = 0;
|
||||
|
||||
@@ -2098,6 +2098,7 @@ Device::allocMapTarget(
|
||||
amd::Memory& mem,
|
||||
const amd::Coord3D& origin,
|
||||
const amd::Coord3D& region,
|
||||
uint mapFlags,
|
||||
size_t* rowPitch,
|
||||
size_t* slicePitch)
|
||||
{
|
||||
@@ -2109,7 +2110,7 @@ Device::allocMapTarget(
|
||||
}
|
||||
|
||||
// Pass request over to memory
|
||||
return memory->allocMapTarget(origin, region, rowPitch, slicePitch);
|
||||
return memory->allocMapTarget(origin, region, mapFlags, rowPitch, slicePitch);
|
||||
}
|
||||
|
||||
bool
|
||||
|
||||
@@ -111,6 +111,7 @@ public:
|
||||
amd::Memory& mem, //!< Abstraction layer memory object
|
||||
const amd::Coord3D& origin, //!< The map location in memory
|
||||
const amd::Coord3D& region, //!< The map region in memory
|
||||
uint mapFlags, //!< Map flags
|
||||
size_t* rowPitch = NULL, //!< Row pitch for the mapped memory
|
||||
size_t* slicePitch = NULL //!< Slice for the mapped memory
|
||||
) { return NULL; }
|
||||
@@ -437,6 +438,7 @@ public:
|
||||
amd::Memory& mem, //!< Abstraction layer memory object
|
||||
const amd::Coord3D& origin, //!< The map location in memory
|
||||
const amd::Coord3D& region, //!< The map region in memory
|
||||
uint mapFlags, //!< Map flags
|
||||
size_t* rowPitch = NULL, //!< Row pitch for the mapped memory
|
||||
size_t* slicePitch = NULL //!< Slice for the mapped memory
|
||||
);
|
||||
|
||||
@@ -906,6 +906,7 @@ void*
|
||||
Memory::allocMapTarget(
|
||||
const amd::Coord3D& origin,
|
||||
const amd::Coord3D& region,
|
||||
uint mapFlags,
|
||||
size_t* rowPitch,
|
||||
size_t* slicePitch)
|
||||
{
|
||||
@@ -921,7 +922,8 @@ Memory::allocMapTarget(
|
||||
incIndMapCount();
|
||||
|
||||
// If host memory exists, use it
|
||||
if (owner()->getHostMem() != NULL) {
|
||||
if ((owner()->getHostMem() != NULL) &&
|
||||
(isCacheable() || !isHostMemDirectAccess() || !(mapFlags & CL_MAP_READ))) {
|
||||
mapAddress = reinterpret_cast<address>(owner()->getHostMem());
|
||||
}
|
||||
// If resource is a persistent allocation, we can use it directly
|
||||
@@ -1226,6 +1228,7 @@ void*
|
||||
Image::allocMapTarget(
|
||||
const amd::Coord3D& origin,
|
||||
const amd::Coord3D& region,
|
||||
uint mapFlags,
|
||||
size_t* rowPitch,
|
||||
size_t* slicePitch)
|
||||
{
|
||||
@@ -1245,7 +1248,8 @@ Image::allocMapTarget(
|
||||
incIndMapCount();
|
||||
|
||||
// If host memory exists, use it
|
||||
if (owner()->getHostMem() != NULL) {
|
||||
if ((owner()->getHostMem() != NULL) &&
|
||||
(isCacheable() || !isHostMemDirectAccess() || !(mapFlags & CL_MAP_READ))) {
|
||||
useRemoteResource = false;
|
||||
mapAddress = reinterpret_cast<address>(owner()->getHostMem());
|
||||
amd::Image* amdImage = owner()->asImage();
|
||||
|
||||
@@ -121,6 +121,7 @@ public:
|
||||
virtual void* allocMapTarget(
|
||||
const amd::Coord3D& origin, //!< The map location in memory
|
||||
const amd::Coord3D& region, //!< The map region in memory
|
||||
uint mapFlags, //!< Map flags
|
||||
size_t* rowPitch = NULL, //!< Row pitch for the mapped memory
|
||||
size_t* slicePitch = NULL //!< Slice for the mapped memory
|
||||
);
|
||||
@@ -288,6 +289,7 @@ public:
|
||||
virtual void* allocMapTarget(
|
||||
const amd::Coord3D& origin, //!< The map location in memory
|
||||
const amd::Coord3D& region, //!< The map region in memory
|
||||
uint mapFlags, //!< Map flags
|
||||
size_t* rowPitch = NULL, //!< Row pitch for the mapped memory
|
||||
size_t* slicePitch = NULL //!< Slice for the mapped memory
|
||||
);
|
||||
|
||||
@@ -1085,14 +1085,15 @@ VirtualGPU::submitMapMemory(amd::MapMemoryCommand& vcmd)
|
||||
|
||||
gpu::Memory* memory = dev().getGpuMemory(&vcmd.memory());
|
||||
|
||||
// Save write map info for unmap copy
|
||||
if (vcmd.mapFlags() & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) {
|
||||
memory->saveWriteMapInfo(vcmd.origin(),
|
||||
vcmd.size(), vcmd.isEntireMemory());
|
||||
}
|
||||
// Save map info for unmap operation
|
||||
memory->saveMapInfo(vcmd.origin(), vcmd.size(),
|
||||
vcmd.mapFlags(), vcmd.isEntireMemory());
|
||||
|
||||
// If we have host memory, use it
|
||||
if (memory->owner()->getHostMem() != NULL) {
|
||||
if ((memory->owner()->getHostMem() != NULL) &&
|
||||
(memory->isCacheable() ||
|
||||
!memory->isHostMemDirectAccess() ||
|
||||
!(vcmd.mapFlags() & CL_MAP_READ))) {
|
||||
if (!memory->isHostMemDirectAccess()) {
|
||||
// Make sure GPU finished operation before
|
||||
// synchronization with the backing store
|
||||
@@ -1177,7 +1178,10 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd)
|
||||
amd::Memory* owner = memory->owner();
|
||||
|
||||
// We used host memory
|
||||
if (owner->getHostMem() != NULL) {
|
||||
if ((owner->getHostMem() != NULL) &&
|
||||
(memory->isCacheable() ||
|
||||
!memory->isHostMemDirectAccess() ||
|
||||
!memory->isUnmapRead())) {
|
||||
if (memory->isUnmapWrite()) {
|
||||
// Target is the backing store, so sync
|
||||
owner->signalWrite(NULL);
|
||||
@@ -1254,8 +1258,8 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd)
|
||||
vcmd.setStatus(CL_INVALID_VALUE);
|
||||
}
|
||||
|
||||
// Clear read only flag
|
||||
memory->clearUnmapWrite();
|
||||
// Clear unmap flags
|
||||
memory->clearUnmapFlags();
|
||||
|
||||
profilingEnd(vcmd);
|
||||
}
|
||||
@@ -1357,31 +1361,20 @@ VirtualGPU::submitSvmMapMemory(amd::SvmMapMemoryCommand& vcmd)
|
||||
|
||||
profilingBegin(vcmd, true);
|
||||
|
||||
//check if the ptr is in the svm space
|
||||
amd::Memory* svmMem = vcmd.getSvmMem();
|
||||
if (NULL == svmMem) {
|
||||
LogWarning("wrong svm address ");
|
||||
vcmd.setStatus(CL_INVALID_VALUE);
|
||||
return;
|
||||
}
|
||||
|
||||
// Make sure we have memory for the command execution
|
||||
gpu::Memory* memory = dev().getGpuMemory(svmMem);
|
||||
gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem());
|
||||
|
||||
if (vcmd.mapFlags() & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) {
|
||||
memory->saveWriteMapInfo(vcmd.origin(), vcmd.size(), vcmd.isEntireMemory());
|
||||
}
|
||||
memory->saveMapInfo(vcmd.origin(), vcmd.size(),
|
||||
vcmd.mapFlags(), vcmd.isEntireMemory());
|
||||
|
||||
if (memory->mapMemory() != NULL) {
|
||||
if (vcmd.mapFlags() & (CL_MAP_READ | CL_MAP_WRITE)) {
|
||||
amd::Coord3D dstOrigin(0, 0, 0);
|
||||
if (memory->cal()->buffer_) {
|
||||
if (!blitMgr().copyBuffer(*memory,
|
||||
*memory->mapMemory(), vcmd.origin(), dstOrigin,
|
||||
vcmd.size(), vcmd.isEntireMemory())) {
|
||||
LogError("submitSVMMapMemory() - copy failed");
|
||||
vcmd.setStatus(CL_MAP_FAILURE);
|
||||
}
|
||||
assert(memory->cal()->buffer_ && "SVM memory can't be an image");
|
||||
if (!blitMgr().copyBuffer(*memory, *memory->mapMemory(),
|
||||
vcmd.origin(), dstOrigin, vcmd.size(), vcmd.isEntireMemory())) {
|
||||
LogError("submitSVMMapMemory() - copy failed");
|
||||
vcmd.setStatus(CL_MAP_FAILURE);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1399,30 +1392,18 @@ VirtualGPU::submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& vcmd)
|
||||
amd::ScopedLock lock(execution());
|
||||
profilingBegin(vcmd, true);
|
||||
|
||||
amd::Memory* svmMem = vcmd.getSvmMem();
|
||||
if (NULL == svmMem) {
|
||||
LogWarning("wrong svm address ");
|
||||
vcmd.setStatus(CL_INVALID_VALUE);
|
||||
return;
|
||||
}
|
||||
|
||||
gpu::Memory* memory = dev().getGpuMemory(svmMem);
|
||||
gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem());
|
||||
|
||||
if (memory->mapMemory() != NULL) {
|
||||
if (memory->isUnmapWrite()) {
|
||||
amd::Coord3D srcOrigin(0, 0, 0);
|
||||
// Target is a remote resource, so copy
|
||||
assert(memory->mapMemory() != NULL);
|
||||
if (memory->cal()->buffer_) {
|
||||
if (!blitMgr().copyBuffer(
|
||||
*memory->mapMemory(), *memory,
|
||||
srcOrigin,
|
||||
memory->writeMapInfo()->origin_,
|
||||
memory->writeMapInfo()->region_,
|
||||
memory->writeMapInfo()->entire_)) {
|
||||
LogError("submitUnmapMemory() - copy failed");
|
||||
vcmd.setStatus(CL_OUT_OF_RESOURCES);
|
||||
}
|
||||
assert(memory->cal()->buffer_ && "SVM memory can't be an image");
|
||||
if (!blitMgr().copyBuffer(*memory->mapMemory(), *memory, srcOrigin,
|
||||
memory->writeMapInfo()->origin_, memory->writeMapInfo()->region_,
|
||||
memory->writeMapInfo()->entire_)) {
|
||||
LogError("submitSvmUnmapMemory() - copy failed");
|
||||
vcmd.setStatus(CL_OUT_OF_RESOURCES);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -685,6 +685,7 @@ Device::allocMapTarget(
|
||||
amd::Memory& mem,
|
||||
const amd::Coord3D& origin,
|
||||
const amd::Coord3D& region,
|
||||
uint mapFlags,
|
||||
size_t* rowPitch,
|
||||
size_t* slicePitch)
|
||||
{
|
||||
@@ -696,7 +697,7 @@ Device::allocMapTarget(
|
||||
}
|
||||
|
||||
// Pass request over to memory
|
||||
return memory->allocMapTarget(origin, region, rowPitch, slicePitch);
|
||||
return memory->allocMapTarget(origin, region, mapFlags, rowPitch, slicePitch);
|
||||
}
|
||||
|
||||
bool
|
||||
|
||||
@@ -191,6 +191,7 @@ public:
|
||||
amd::Memory& mem, //!< Abstraction layer memory object
|
||||
const amd::Coord3D& origin, //!< The map location in memory
|
||||
const amd::Coord3D& region, //!< The map region in memory
|
||||
uint mapFlags, //!< Map flags
|
||||
size_t* rowPitch = NULL, //!< Row pitch for the mapped memory
|
||||
size_t* slicePitch = NULL //!< Slice for the mapped memory
|
||||
) {
|
||||
@@ -323,6 +324,7 @@ public:
|
||||
amd::Memory &mem, //!< Abstraction layer memory object
|
||||
const amd::Coord3D &origin, //!< The map location in memory
|
||||
const amd::Coord3D ®ion, //!< The map region in memory
|
||||
uint mapFlags, //!< Map flags
|
||||
size_t *rowPitch = NULL, //!< Row pitch for the mapped memory
|
||||
size_t *slicePitch = NULL //!< Slice for the mapped memory
|
||||
);
|
||||
|
||||
@@ -79,6 +79,7 @@ Memory::freeMapMemory()
|
||||
void *
|
||||
Memory::allocMapTarget(const amd::Coord3D &origin,
|
||||
const amd::Coord3D ®ion,
|
||||
uint mapFlags,
|
||||
size_t *rowPitch,
|
||||
size_t *slicePitch)
|
||||
{
|
||||
@@ -146,7 +147,7 @@ Memory::cpuMap(
|
||||
{
|
||||
// Create the map target.
|
||||
void * mapTarget =
|
||||
allocMapTarget(amd::Coord3D(0), amd::Coord3D(0), rowPitch, slicePitch);
|
||||
allocMapTarget(amd::Coord3D(0), amd::Coord3D(0), 0, rowPitch, slicePitch);
|
||||
|
||||
// Sync to map target if no direct access.
|
||||
if (!isHostMemDirectAccess()) {
|
||||
@@ -862,6 +863,7 @@ Image::createView(Image &parent)
|
||||
|
||||
void* Image::allocMapTarget(const amd::Coord3D& origin,
|
||||
const amd::Coord3D& region,
|
||||
uint mapFlags,
|
||||
size_t* rowPitch,
|
||||
size_t* slicePitch)
|
||||
{
|
||||
|
||||
@@ -35,6 +35,7 @@ class Memory : public device::Memory {
|
||||
// of an indirect map for a given memory object
|
||||
virtual void *allocMapTarget(const amd::Coord3D &origin,
|
||||
const amd::Coord3D ®ion,
|
||||
uint mapFlags,
|
||||
size_t *rowPitch,
|
||||
size_t *slicePitch);
|
||||
|
||||
@@ -168,6 +169,7 @@ public:
|
||||
//! of an indirect map for a given memory object
|
||||
virtual void* allocMapTarget(const amd::Coord3D& origin,
|
||||
const amd::Coord3D& region,
|
||||
uint mapFlags,
|
||||
size_t* rowPitch,
|
||||
size_t* slicePitch);
|
||||
|
||||
|
||||
@@ -589,9 +589,8 @@ void VirtualGPU::submitMapMemory(amd::MapMemoryCommand &cmd)
|
||||
|
||||
// Save map write requirement.
|
||||
if (mapFlag & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) {
|
||||
devMemory->saveWriteMapInfo(cmd.origin(),
|
||||
cmd.size(),
|
||||
cmd.isEntireMemory());
|
||||
devMemory->saveMapInfo(cmd.origin(), cmd.size(),
|
||||
mapFlag, cmd.isEntireMemory());
|
||||
}
|
||||
|
||||
// Sync to the map target.
|
||||
@@ -686,7 +685,7 @@ void VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand &cmd)
|
||||
}
|
||||
}
|
||||
|
||||
devMemory->clearUnmapWrite();
|
||||
devMemory->clearUnmapFlags();
|
||||
|
||||
cmd.memory().signalWrite(&dev());
|
||||
}
|
||||
|
||||
Referência em uma Nova Issue
Bloquear um usuário