From b0631f7ab9cc2a366ddfdb25c5eabe53469c7907 Mon Sep 17 00:00:00 2001
From: foreman
Date: Wed, 13 Apr 2016 13:27:37 -0400
Subject: [PATCH] P4 to Git Change 1257532 by gandryey@gera-ocl on 2016/04/13
13:18:22
SWDEV-92049 - Forum [2712399]: clEnqueueMapBuffer in parallel
- Handle multiple unmapInfo structures of multiple simultaneous maps of the same buffer
- The change didn't affect images path, since it requires extra handling
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_memobj.cpp#79 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#194 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#271 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#126 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#399 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsamemory.cpp#25 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsamemory.hpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsavirtual.cpp#64 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#82 edit
---
rocclr/runtime/device/device.cpp | 28 +++++++--
rocclr/runtime/device/device.hpp | 76 +++++++++++++++++-------
rocclr/runtime/device/gpu/gpumemory.cpp | 3 -
rocclr/runtime/device/gpu/gpuvirtual.cpp | 61 ++++++++++---------
rocclr/runtime/device/pal/palvirtual.cpp | 56 +++++++++--------
rocclr/runtime/platform/command.hpp | 60 ++++++++++++-------
6 files changed, 178 insertions(+), 106 deletions(-)
diff --git a/rocclr/runtime/device/device.cpp b/rocclr/runtime/device/device.cpp
index 449eb52202..0d96f97977 100644
--- a/rocclr/runtime/device/device.cpp
+++ b/rocclr/runtime/device/device.cpp
@@ -675,22 +675,38 @@ Kernel::openclMangledName(const std::string& name)
void
Memory::saveMapInfo(
+ const void* mapAddress,
const amd::Coord3D origin,
const amd::Coord3D region,
uint mapFlags,
bool entire,
amd::Image* baseMip)
{
+ // Map/Unmap must be serialized.
+ amd::ScopedLock lock(owner()->lockMemoryOps());
+
+ WriteMapInfo info = {};
+ WriteMapInfo* pInfo = &info;
+ auto it = writeMapInfo_.find(mapAddress);
+ if (it != writeMapInfo_.end()) {
+ LogWarning("Double map of the same region!");
+ }
+ else {
+ writeMapInfo_.insert(std::pair(mapAddress, info));
+ it = writeMapInfo_.find(mapAddress);
+ pInfo = &it->second;
+ }
+
if (mapFlags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) {
- writeMapInfo_.origin_ = origin;
- writeMapInfo_.region_ = region;
- writeMapInfo_.entire_ = entire;
- flags_ |= UnmapWrite;
+ pInfo->origin_ = origin;
+ pInfo->region_ = region;
+ pInfo->entire_ = entire;
+ pInfo->unmapWrite_ = true;
}
if (mapFlags & CL_MAP_READ) {
- flags_ |= UnmapRead;
+ pInfo->unmapRead_ = true;
}
- writeMapInfo_.baseMip_ = baseMip;
+ pInfo->baseMip_ = baseMip;
}
Program::Program(amd::Device& device)
diff --git a/rocclr/runtime/device/device.hpp b/rocclr/runtime/device/device.hpp
index 71e1bb0493..239882ab32 100644
--- a/rocclr/runtime/device/device.hpp
+++ b/rocclr/runtime/device/device.hpp
@@ -634,8 +634,25 @@ public:
amd::Coord3D origin_; //!< Origin of the map location
amd::Coord3D region_; //!< Mapped region
amd::Image* baseMip_; //!< The base mip level for images
- bool entire_; //!< True if the enitre memory was mapped
- WriteMapInfo(): origin_(0, 0, 0), region_(0, 0, 0), baseMip_(NULL), entire_(false) {}
+ union {
+ struct {
+ uint32_t unmapWrite_: 1; //!< Unmap write operation
+ uint32_t unmapRead_: 1; //!< Unmap read operation
+ uint32_t entire_: 1; //!< Process the entire memory
+ };
+ uint32_t flags_;
+ };
+
+ //! Returns the state of entire map
+ bool isEntire() const { return (entire_) ? true : false; }
+
+ //! Returns the state of map write flag
+ bool isUnmapWrite() const { return (unmapWrite_) ? true : false; }
+
+ //! Returns the state of map read flag
+ bool isUnmapRead() const { return (unmapRead_) ? true : false; }
+
+ WriteMapInfo(): origin_(0, 0, 0), region_(0, 0, 0), baseMip_(NULL), flags_(0) {}
};
//! Constructor (from an amd::Memory object).
@@ -733,23 +750,44 @@ public:
//! @note: It's not a thread safe operation, the app must implement
//! synchronization for the multiple write maps if necessary
void saveMapInfo(
- const amd::Coord3D origin, //!< Origin of the map location
- const amd::Coord3D region, //!< Mapped region
- uint mapFlags, //< Map flags
- bool entire, //!< True if the enitre memory was mapped
- amd::Image* baseMip = NULL //!< The base mip level for map
+ const void* mapAddress, //!< Map cpu address
+ const amd::Coord3D origin, //!< Origin of the map location
+ const amd::Coord3D region, //!< Mapped region
+ uint mapFlags, //!< Map flags
+ bool entire, //!< True if the enitre memory was mapped
+ amd::Image* baseMip = nullptr //!< The base mip level for map
);
- const WriteMapInfo* writeMapInfo() const { return &writeMapInfo_; }
+ const WriteMapInfo* writeMapInfo(const void* mapAddress) const
+ {
+ // Unmap must be serialized.
+ amd::ScopedLock lock(owner()->lockMemoryOps());
+
+ auto it = writeMapInfo_.find(mapAddress);
+ if (it == writeMapInfo_.end()) {
+ if (writeMapInfo_.size() == 0) {
+ assert(false && "Unmap() call without map!");
+ return nullptr;
+ }
+ LogWarning("Unknown unmap signature!");
+ // Get the first map info
+ it = writeMapInfo_.begin();
+ }
+ return &it->second;
+ }
//! Clear memory object as mapped read only
- void clearUnmapFlags() { flags_ &= ~(UnmapWrite | UnmapRead); }
-
- //! Returns state of map write flag
- bool isUnmapWrite() const { return (flags_ & UnmapWrite) ? true : false; }
-
- //! Returns state of map read flag
- bool isUnmapRead() const { return (flags_ & UnmapRead) ? true : false; }
+ void clearUnmapInfo(const void* mapAddress)
+ {
+ // Unmap must be serialized.
+ amd::ScopedLock lock(owner()->lockMemoryOps());
+ auto it = writeMapInfo_.find(mapAddress);
+ if (it == writeMapInfo_.end()) {
+ // Get the first map info
+ it = writeMapInfo_.begin();
+ }
+ writeMapInfo_.erase(it);
+ }
//! Returns state of memory direct access flag
bool isHostMemDirectAccess() const
@@ -764,10 +802,8 @@ protected:
HostMemoryDirectAccess = 0x00000001, //!< GPU has direct access to the host memory
MapResourceAlloced = 0x00000002, //!< Map resource was allocated
PinnedMemoryAlloced = 0x00000004, //!< An extra pinned resource was allocated
- UnmapWrite = 0x00000008, //!< Memory was mapped for write
- SubMemoryObject = 0x00000010, //!< Memory is sub-memory
- HostMemoryRegistered = 0x00000020, //!< Host memory was registered
- UnmapRead = 0x00000040, //!< Memory was mapped for read
+ SubMemoryObject = 0x00000008, //!< Memory is sub-memory
+ HostMemoryRegistered = 0x00000010, //!< Host memory was registered
};
uint flags_; //!< Memory object flags
@@ -781,7 +817,7 @@ protected:
//! can use a remote resource and DMA, avoiding the additional CPU memcpy.
amd::Memory* mapMemory_; //!< Memory used as map target buffer
volatile size_t indirectMapCount_; //!< Number of maps
- WriteMapInfo writeMapInfo_; //!< Saved write map info for partial unmap
+ std::map writeMapInfo_; //!< Saved write map info for partial unmap
//! Increment map count
void incIndMapCount() { ++indirectMapCount_; }
diff --git a/rocclr/runtime/device/gpu/gpumemory.cpp b/rocclr/runtime/device/gpu/gpumemory.cpp
index f965fa7155..807b26d6d4 100644
--- a/rocclr/runtime/device/gpu/gpumemory.cpp
+++ b/rocclr/runtime/device/gpu/gpumemory.cpp
@@ -945,9 +945,6 @@ Memory::allocMapTarget(
}
}
mapAddress = mapMemory()->data();
-
- // Use start of the indirect buffer
- offset = 0;
}
return mapAddress + offset;
diff --git a/rocclr/runtime/device/gpu/gpuvirtual.cpp b/rocclr/runtime/device/gpu/gpuvirtual.cpp
index 30c268429c..19ba1efda9 100644
--- a/rocclr/runtime/device/gpu/gpuvirtual.cpp
+++ b/rocclr/runtime/device/gpu/gpuvirtual.cpp
@@ -1084,7 +1084,7 @@ VirtualGPU::submitMapMemory(amd::MapMemoryCommand& vcmd)
gpu::Memory* memory = dev().getGpuMemory(&vcmd.memory());
// Save map info for unmap operation
- memory->saveMapInfo(vcmd.origin(), vcmd.size(),
+ memory->saveMapInfo(vcmd.mapPtr(), vcmd.origin(), vcmd.size(),
vcmd.mapFlags(), vcmd.isEntireMemory());
// If we have host memory, use it
@@ -1111,7 +1111,7 @@ VirtualGPU::submitMapMemory(amd::MapMemoryCommand& vcmd)
amd::Coord3D dstOrigin(0, 0, 0);
if (memory->cal()->buffer_) {
if (!blitMgr().copyBuffer(*memory,
- *memory->mapMemory(), vcmd.origin(), dstOrigin,
+ *memory->mapMemory(), vcmd.origin(), vcmd.origin(),
vcmd.size(), vcmd.isEntireMemory())) {
LogError("submitMapMemory() - copy failed");
vcmd.setStatus(CL_MAP_FAILURE);
@@ -1151,7 +1151,7 @@ VirtualGPU::submitMapMemory(amd::MapMemoryCommand& vcmd)
amd::Image* amdImage = vcmd.memory().parent()->asImage();
if ((amdImage != NULL) && (amdImage->getMipLevels() > 1)) {
// Save map write info in the parent object
- dev().getGpuMemory(amdImage)->saveMapInfo(
+ dev().getGpuMemory(amdImage)->saveMapInfo(vcmd.mapPtr(),
vcmd.origin(), vcmd.size(),
vcmd.mapFlags(), vcmd.isEntireMemory(),
vcmd.memory().asImage());
@@ -1183,22 +1183,24 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd)
gpu::Memory* memory = dev().getGpuMemory(&vcmd.memory());
amd::Memory* owner = memory->owner();
bool unmapMip = false;
+ const device::Memory::WriteMapInfo* writeMapInfo =
+ memory->writeMapInfo(vcmd.mapPtr());
// Check if image is a mipmap and assign a saved view
amd::Image* amdImage = owner->asImage();
if ((amdImage != NULL) && (amdImage->getMipLevels() > 1) &&
- (memory->writeMapInfo()->baseMip_ != NULL)) {
+ (writeMapInfo->baseMip_ != NULL)) {
// Clear unmap flags from the parent image
- memory->clearUnmapFlags();
+ memory->clearUnmapInfo(vcmd.mapPtr());
// Assign mip level view
- amdImage = memory->writeMapInfo()->baseMip_;
+ amdImage = writeMapInfo->baseMip_;
memory = dev().getGpuMemory(amdImage);
unmapMip = true;
}
// We used host memory
if ((owner->getHostMem() != NULL) && memory->isDirectMap()) {
- if (memory->isUnmapWrite() && !owner->usesSvmPointer()) {
+ if (writeMapInfo->isUnmapWrite() && !owner->usesSvmPointer()) {
// Target is the backing store, so sync
owner->signalWrite(NULL);
memory->syncCacheFromHost(*this);
@@ -1212,17 +1214,17 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd)
memory->unmap(this);
}
else if (memory->mapMemory() != NULL) {
- if (memory->isUnmapWrite()) {
+ if (writeMapInfo->isUnmapWrite()) {
amd::Coord3D srcOrigin(0, 0, 0);
// Target is a remote resource, so copy
assert(memory->mapMemory() != NULL);
if (memory->cal()->buffer_) {
if (!blitMgr().copyBuffer(
*memory->mapMemory(), *memory,
- srcOrigin,
- memory->writeMapInfo()->origin_,
- memory->writeMapInfo()->region_,
- memory->writeMapInfo()->entire_)) {
+ writeMapInfo->origin_,
+ writeMapInfo->origin_,
+ writeMapInfo->region_,
+ writeMapInfo->isEntire())) {
LogError("submitUnmapMemory() - copy failed");
vcmd.setStatus(CL_OUT_OF_RESOURCES);
}
@@ -1230,8 +1232,8 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd)
else if ((vcmd.memory().getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER)) {
amd::Memory* bufferFromImage = NULL;
Memory* memoryBuf = memory;
- amd::Coord3D origin(memory->writeMapInfo()->origin_[0]);
- amd::Coord3D size(memory->writeMapInfo()->region_[0]);
+ amd::Coord3D origin(writeMapInfo->origin_[0]);
+ amd::Coord3D size(writeMapInfo->region_[0]);
size_t elemSize =
vcmd.memory().asImage()->getImageFormat().getElementSize();
origin.c[0] *= elemSize;
@@ -1248,7 +1250,7 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd)
if (!blitMgr().copyBuffer(
*memory->mapMemory(), *memoryBuf,
srcOrigin, origin, size,
- memory->writeMapInfo()->entire_)) {
+ writeMapInfo->isEntire())) {
LogError("submitUnmapMemory() - copy failed");
vcmd.setStatus(CL_OUT_OF_RESOURCES);
}
@@ -1260,9 +1262,9 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd)
if (!blitMgr().copyBufferToImage(
*memory->mapMemory(), *memory,
srcOrigin,
- memory->writeMapInfo()->origin_,
- memory->writeMapInfo()->region_,
- memory->writeMapInfo()->entire_)) {
+ writeMapInfo->origin_,
+ writeMapInfo->region_,
+ writeMapInfo->isEntire())) {
LogError("submitUnmapMemory() - copy failed");
vcmd.setStatus(CL_OUT_OF_RESOURCES);
}
@@ -1275,7 +1277,7 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd)
}
// Clear unmap flags
- memory->clearUnmapFlags();
+ memory->clearUnmapInfo(vcmd.mapPtr());
// Release a view for a mipmap map
if (unmapMip) {
@@ -1385,16 +1387,15 @@ VirtualGPU::submitSvmMapMemory(amd::SvmMapMemoryCommand& vcmd)
if (!dev().isFineGrainedSystem()) {
// Make sure we have memory for the command execution
gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem());
-
- memory->saveMapInfo(vcmd.origin(), vcmd.size(),
+ memory->saveMapInfo(vcmd.svmPtr(), vcmd.origin(), vcmd.size(),
vcmd.mapFlags(), vcmd.isEntireMemory());
if (memory->mapMemory() != NULL) {
if (vcmd.mapFlags() & (CL_MAP_READ | CL_MAP_WRITE)) {
- amd::Coord3D dstOrigin(0, 0, 0);
assert(memory->cal()->buffer_ && "SVM memory can't be an image");
if (!blitMgr().copyBuffer(*memory, *memory->mapMemory(),
- vcmd.origin(), dstOrigin, vcmd.size(), vcmd.isEntireMemory())) {
+ vcmd.origin(), vcmd.origin(), vcmd.size(),
+ vcmd.isEntireMemory())) {
LogError("submitSVMMapMemory() - copy failed");
vcmd.setStatus(CL_MAP_FAILURE);
}
@@ -1417,21 +1418,23 @@ VirtualGPU::submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& vcmd)
//no op for FGS supported device
if (!dev().isFineGrainedSystem()) {
-
gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem());
+ const device::Memory::WriteMapInfo* writeMapInfo =
+ memory->writeMapInfo(vcmd.svmPtr());
+
if (memory->mapMemory() != NULL) {
- if (memory->isUnmapWrite()) {
- amd::Coord3D srcOrigin(0, 0, 0);
+ if (writeMapInfo->isUnmapWrite()) {
// Target is a remote resource, so copy
assert(memory->cal()->buffer_ && "SVM memory can't be an image");
- if (!blitMgr().copyBuffer(*memory->mapMemory(), *memory, srcOrigin,
- memory->writeMapInfo()->origin_, memory->writeMapInfo()->region_,
- memory->writeMapInfo()->entire_)) {
+ if (!blitMgr().copyBuffer(*memory->mapMemory(), *memory,
+ writeMapInfo->origin_, writeMapInfo->origin_,
+ writeMapInfo->region_, writeMapInfo->isEntire())) {
LogError("submitSvmUnmapMemory() - copy failed");
vcmd.setStatus(CL_OUT_OF_RESOURCES);
}
}
}
+ memory->clearUnmapInfo(vcmd.svmPtr());
}
profilingEnd(vcmd);
diff --git a/rocclr/runtime/device/pal/palvirtual.cpp b/rocclr/runtime/device/pal/palvirtual.cpp
index 1b1c80c93b..3da5c05333 100644
--- a/rocclr/runtime/device/pal/palvirtual.cpp
+++ b/rocclr/runtime/device/pal/palvirtual.cpp
@@ -1327,7 +1327,7 @@ VirtualGPU::submitMapMemory(amd::MapMemoryCommand& vcmd)
pal::Memory* memory = dev().getGpuMemory(&vcmd.memory());
// Save map info for unmap operation
- memory->saveMapInfo(vcmd.origin(), vcmd.size(),
+ memory->saveMapInfo(vcmd.mapPtr(), vcmd.origin(), vcmd.size(),
vcmd.mapFlags(), vcmd.isEntireMemory());
// If we have host memory, use it
@@ -1394,7 +1394,7 @@ VirtualGPU::submitMapMemory(amd::MapMemoryCommand& vcmd)
amd::Image* amdImage = vcmd.memory().parent()->asImage();
if ((amdImage != nullptr) && (amdImage->getMipLevels() > 1)) {
// Save map write info in the parent object
- dev().getGpuMemory(amdImage)->saveMapInfo(
+ dev().getGpuMemory(amdImage)->saveMapInfo(vcmd.mapPtr(),
vcmd.origin(), vcmd.size(),
vcmd.mapFlags(), vcmd.isEntireMemory(),
vcmd.memory().asImage());
@@ -1426,22 +1426,24 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd)
pal::Memory* memory = dev().getGpuMemory(&vcmd.memory());
amd::Memory* owner = memory->owner();
bool unmapMip = false;
+ const device::Memory::WriteMapInfo* writeMapInfo =
+ memory->writeMapInfo(vcmd.mapPtr());
// Check if image is a mipmap and assign a saved view
amd::Image* amdImage = owner->asImage();
if ((amdImage != nullptr) && (amdImage->getMipLevels() > 1) &&
- (memory->writeMapInfo()->baseMip_ != nullptr)) {
+ (writeMapInfo->baseMip_ != nullptr)) {
// Clear unmap flags from the parent image
- memory->clearUnmapFlags();
+ memory->clearUnmapInfo(vcmd.mapPtr());
// Assign mip level view
- amdImage = memory->writeMapInfo()->baseMip_;
+ amdImage = writeMapInfo->baseMip_;
memory = dev().getGpuMemory(amdImage);
unmapMip = true;
}
// We used host memory
if ((owner->getHostMem() != nullptr) && memory->isDirectMap()) {
- if (memory->isUnmapWrite() && !owner->usesSvmPointer()) {
+ if (writeMapInfo->isUnmapWrite() && !owner->usesSvmPointer()) {
// Target is the backing store, so sync
owner->signalWrite(nullptr);
memory->syncCacheFromHost(*this);
@@ -1455,17 +1457,17 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd)
memory->unmap(this);
}
else if (memory->mapMemory() != nullptr) {
- if (memory->isUnmapWrite()) {
+ if (writeMapInfo->isUnmapWrite()) {
amd::Coord3D srcOrigin(0, 0, 0);
// Target is a remote resource, so copy
assert(memory->mapMemory() != nullptr);
if (memory->desc().buffer_) {
if (!blitMgr().copyBuffer(
*memory->mapMemory(), *memory,
- srcOrigin,
- memory->writeMapInfo()->origin_,
- memory->writeMapInfo()->region_,
- memory->writeMapInfo()->entire_)) {
+ writeMapInfo->origin_,
+ writeMapInfo->origin_,
+ writeMapInfo->region_,
+ writeMapInfo->isEntire())) {
LogError("submitUnmapMemory() - copy failed");
vcmd.setStatus(CL_OUT_OF_RESOURCES);
}
@@ -1473,8 +1475,8 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd)
else if ((vcmd.memory().getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER)) {
amd::Memory* bufferFromImage = nullptr;
Memory* memoryBuf = memory;
- amd::Coord3D origin(memory->writeMapInfo()->origin_[0]);
- amd::Coord3D size(memory->writeMapInfo()->region_[0]);
+ amd::Coord3D origin(writeMapInfo->origin_[0]);
+ amd::Coord3D size(writeMapInfo->region_[0]);
size_t elemSize =
vcmd.memory().asImage()->getImageFormat().getElementSize();
origin.c[0] *= elemSize;
@@ -1491,7 +1493,7 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd)
if (!blitMgr().copyBuffer(
*memory->mapMemory(), *memoryBuf,
srcOrigin, origin, size,
- memory->writeMapInfo()->entire_)) {
+ writeMapInfo->isEntire())) {
LogError("submitUnmapMemory() - copy failed");
vcmd.setStatus(CL_OUT_OF_RESOURCES);
}
@@ -1503,9 +1505,9 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd)
if (!blitMgr().copyBufferToImage(
*memory->mapMemory(), *memory,
srcOrigin,
- memory->writeMapInfo()->origin_,
- memory->writeMapInfo()->region_,
- memory->writeMapInfo()->entire_)) {
+ writeMapInfo->origin_,
+ writeMapInfo->region_,
+ writeMapInfo->isEntire())) {
LogError("submitUnmapMemory() - copy failed");
vcmd.setStatus(CL_OUT_OF_RESOURCES);
}
@@ -1518,7 +1520,7 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd)
}
// Clear unmap flags
- memory->clearUnmapFlags();
+ memory->clearUnmapInfo(vcmd.mapPtr());
// Release a view for a mipmap map
if (unmapMip) {
@@ -1629,15 +1631,14 @@ VirtualGPU::submitSvmMapMemory(amd::SvmMapMemoryCommand& vcmd)
// Make sure we have memory for the command execution
pal::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem());
- memory->saveMapInfo(vcmd.origin(), vcmd.size(),
+ memory->saveMapInfo(vcmd.svmPtr(), vcmd.origin(), vcmd.size(),
vcmd.mapFlags(), vcmd.isEntireMemory());
if (memory->mapMemory() != nullptr) {
if (vcmd.mapFlags() & (CL_MAP_READ | CL_MAP_WRITE)) {
- amd::Coord3D dstOrigin(0, 0, 0);
assert(memory->desc().buffer_ && "SVM memory can't be an image");
if (!blitMgr().copyBuffer(*memory, *memory->mapMemory(),
- vcmd.origin(), dstOrigin, vcmd.size(), vcmd.isEntireMemory())) {
+ vcmd.origin(), vcmd.origin(), vcmd.size(), vcmd.isEntireMemory())) {
LogError("submitSVMMapMemory() - copy failed");
vcmd.setStatus(CL_MAP_FAILURE);
}
@@ -1660,21 +1661,24 @@ VirtualGPU::submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& vcmd)
//no op for FGS supported device
if (!dev().isFineGrainedSystem()) {
-
pal::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem());
+ const device::Memory::WriteMapInfo* writeMapInfo =
+ memory->writeMapInfo(vcmd.svmPtr());
+
if (memory->mapMemory() != nullptr) {
- if (memory->isUnmapWrite()) {
+ if (writeMapInfo->isUnmapWrite()) {
amd::Coord3D srcOrigin(0, 0, 0);
// Target is a remote resource, so copy
assert(memory->desc().buffer_ && "SVM memory can't be an image");
- if (!blitMgr().copyBuffer(*memory->mapMemory(), *memory, srcOrigin,
- memory->writeMapInfo()->origin_, memory->writeMapInfo()->region_,
- memory->writeMapInfo()->entire_)) {
+ if (!blitMgr().copyBuffer(*memory->mapMemory(), *memory,
+ writeMapInfo->origin_, writeMapInfo->origin_,
+ writeMapInfo->region_, writeMapInfo->isEntire())) {
LogError("submitSvmUnmapMemory() - copy failed");
vcmd.setStatus(CL_OUT_OF_RESOURCES);
}
}
}
+ memory->clearUnmapInfo(vcmd.svmPtr());
}
profilingEnd(vcmd);
diff --git a/rocclr/runtime/platform/command.hpp b/rocclr/runtime/platform/command.hpp
index 7d03925990..94d35ab071 100644
--- a/rocclr/runtime/platform/command.hpp
+++ b/rocclr/runtime/platform/command.hpp
@@ -709,10 +709,11 @@ public:
class MapMemoryCommand: public OneMemoryArgCommand
{
private:
- cl_map_flags mapFlags_; //!< Flags controlling the map.
- bool blocking_; //!< True for blocking maps
- Coord3D origin_; //!< Origin of the region to map.
- Coord3D size_; //!< Size of the region to map.
+ cl_map_flags mapFlags_; //!< Flags controlling the map.
+ bool blocking_; //!< True for blocking maps
+ Coord3D origin_; //!< Origin of the region to map.
+ Coord3D size_; //!< Size of the region to map.
+ const void* mapPtr_; //!< Host-space pointer that the object is currently mapped at
public:
//! Construct a new MapMemoryCommand
@@ -723,11 +724,12 @@ public:
Memory& memory, cl_map_flags mapFlags,
bool blocking,
Coord3D origin, Coord3D size,
- size_t* imgRowPitch = NULL,
- size_t* imgSlicePitch = NULL) :
+ size_t* imgRowPitch = nullptr,
+ size_t* imgSlicePitch = nullptr,
+ void* mapPtr = nullptr) :
OneMemoryArgCommand(queue, cmdType, eventWaitList, memory),
mapFlags_(mapFlags), blocking_(blocking),
- origin_(origin), size_(size)
+ origin_(origin), size_(size), mapPtr_(mapPtr)
{
// Sanity checks
assert(size.c[0] > 0 && "invalid");
@@ -749,6 +751,9 @@ public:
bool blocking() const { return blocking_; }
//! Returns true if the entire memory object is mapped
bool isEntireMemory() const;
+ //! Read the map pointer
+ const void* mapPtr() const { return mapPtr_; }
+
};
@@ -1398,10 +1403,11 @@ public:
class SvmMapMemoryCommand : public Command
{
private:
- Memory* svmMem_; //!< the pointer to the amd::Memory object corresponding the svm pointer mapped
- Coord3D size_; //!< the map size
- Coord3D origin_; //!< the origin of the mapped svm pointer shift from the beginning of svm space allocated
- cl_map_flags flags_; //!< map flags
+ Memory* svmMem_; //!< the pointer to the amd::Memory object corresponding the svm pointer mapped
+ Coord3D size_; //!< the map size
+ Coord3D origin_; //!< the origin of the mapped svm pointer shift from the beginning of svm space allocated
+ cl_map_flags flags_; //!< map flags
+ void* svmPtr_;
public:
SvmMapMemoryCommand(
@@ -1410,12 +1416,14 @@ public:
Memory* svmMem,
const size_t size,
const size_t offset,
- cl_map_flags flags) :
- Command(queue, CL_COMMAND_SVM_MAP, eventWaitList),
- svmMem_(svmMem),
- size_(size),
- origin_(offset),
- flags_(flags)
+ cl_map_flags flags,
+ void* svmPtr)
+ : Command(queue, CL_COMMAND_SVM_MAP, eventWaitList)
+ , svmMem_(svmMem)
+ , size_(size)
+ , origin_(offset)
+ , flags_(flags)
+ , svmPtr_(svmPtr)
{
}
@@ -1432,6 +1440,8 @@ public:
Coord3D origin() const {return origin_;}
+ void* svmPtr() const { return svmPtr_; }
+
bool isEntireMemory() const;
};
@@ -1441,14 +1451,18 @@ public:
class SvmUnmapMemoryCommand : public Command
{
private:
- Memory* svmMem_; //!< the pointer to the amd::Memory object corresponding the svm pointer mapped
+ Memory* svmMem_; //!< the pointer to the amd::Memory object corresponding the svm pointer mapped
+ void* svmPtr_; //!< SVM pointer
+
public:
SvmUnmapMemoryCommand(
HostQueue& queue,
const EventWaitList& eventWaitList,
- Memory* svmMem) :
- Command(queue, CL_COMMAND_SVM_UNMAP, eventWaitList),
- svmMem_(svmMem)
+ Memory* svmMem,
+ void* svmPtr)
+ : Command(queue, CL_COMMAND_SVM_UNMAP, eventWaitList)
+ , svmMem_(svmMem)
+ , svmPtr_(svmPtr)
{}
virtual void submit(device::VirtualDevice& device)
@@ -1456,7 +1470,9 @@ public:
device.submitSvmUnmapMemory(*this);
}
- Memory* getSvmMem() const {return svmMem_;}
+ Memory* getSvmMem() const { return svmMem_; }
+
+ void* svmPtr() const { return svmPtr_; }
};
/*! \brief A generic transfer memory from/to file command.