diff --git a/rocclr/runtime/device/device.cpp b/rocclr/runtime/device/device.cpp index 449eb52202..0d96f97977 100644 --- a/rocclr/runtime/device/device.cpp +++ b/rocclr/runtime/device/device.cpp @@ -675,22 +675,38 @@ Kernel::openclMangledName(const std::string& name) void Memory::saveMapInfo( + const void* mapAddress, const amd::Coord3D origin, const amd::Coord3D region, uint mapFlags, bool entire, amd::Image* baseMip) { + // Map/Unmap must be serialized. + amd::ScopedLock lock(owner()->lockMemoryOps()); + + WriteMapInfo info = {}; + WriteMapInfo* pInfo = &info; + auto it = writeMapInfo_.find(mapAddress); + if (it != writeMapInfo_.end()) { + LogWarning("Double map of the same region!"); + } + else { + writeMapInfo_.insert(std::pair(mapAddress, info)); + it = writeMapInfo_.find(mapAddress); + pInfo = &it->second; + } + if (mapFlags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) { - writeMapInfo_.origin_ = origin; - writeMapInfo_.region_ = region; - writeMapInfo_.entire_ = entire; - flags_ |= UnmapWrite; + pInfo->origin_ = origin; + pInfo->region_ = region; + pInfo->entire_ = entire; + pInfo->unmapWrite_ = true; } if (mapFlags & CL_MAP_READ) { - flags_ |= UnmapRead; + pInfo->unmapRead_ = true; } - writeMapInfo_.baseMip_ = baseMip; + pInfo->baseMip_ = baseMip; } Program::Program(amd::Device& device) diff --git a/rocclr/runtime/device/device.hpp b/rocclr/runtime/device/device.hpp index 71e1bb0493..239882ab32 100644 --- a/rocclr/runtime/device/device.hpp +++ b/rocclr/runtime/device/device.hpp @@ -634,8 +634,25 @@ public: amd::Coord3D origin_; //!< Origin of the map location amd::Coord3D region_; //!< Mapped region amd::Image* baseMip_; //!< The base mip level for images - bool entire_; //!< True if the enitre memory was mapped - WriteMapInfo(): origin_(0, 0, 0), region_(0, 0, 0), baseMip_(NULL), entire_(false) {} + union { + struct { + uint32_t unmapWrite_: 1; //!< Unmap write operation + uint32_t unmapRead_: 1; //!< Unmap read operation + uint32_t entire_: 1; //!< Process the entire memory + }; + uint32_t flags_; + }; + + //! Returns the state of entire map + bool isEntire() const { return (entire_) ? true : false; } + + //! Returns the state of map write flag + bool isUnmapWrite() const { return (unmapWrite_) ? true : false; } + + //! Returns the state of map read flag + bool isUnmapRead() const { return (unmapRead_) ? true : false; } + + WriteMapInfo(): origin_(0, 0, 0), region_(0, 0, 0), baseMip_(NULL), flags_(0) {} }; //! Constructor (from an amd::Memory object). @@ -733,23 +750,44 @@ public: //! @note: It's not a thread safe operation, the app must implement //! synchronization for the multiple write maps if necessary void saveMapInfo( - const amd::Coord3D origin, //!< Origin of the map location - const amd::Coord3D region, //!< Mapped region - uint mapFlags, //< Map flags - bool entire, //!< True if the enitre memory was mapped - amd::Image* baseMip = NULL //!< The base mip level for map + const void* mapAddress, //!< Map cpu address + const amd::Coord3D origin, //!< Origin of the map location + const amd::Coord3D region, //!< Mapped region + uint mapFlags, //!< Map flags + bool entire, //!< True if the enitre memory was mapped + amd::Image* baseMip = nullptr //!< The base mip level for map ); - const WriteMapInfo* writeMapInfo() const { return &writeMapInfo_; } + const WriteMapInfo* writeMapInfo(const void* mapAddress) const + { + // Unmap must be serialized. + amd::ScopedLock lock(owner()->lockMemoryOps()); + + auto it = writeMapInfo_.find(mapAddress); + if (it == writeMapInfo_.end()) { + if (writeMapInfo_.size() == 0) { + assert(false && "Unmap() call without map!"); + return nullptr; + } + LogWarning("Unknown unmap signature!"); + // Get the first map info + it = writeMapInfo_.begin(); + } + return &it->second; + } //! Clear memory object as mapped read only - void clearUnmapFlags() { flags_ &= ~(UnmapWrite | UnmapRead); } - - //! Returns state of map write flag - bool isUnmapWrite() const { return (flags_ & UnmapWrite) ? true : false; } - - //! Returns state of map read flag - bool isUnmapRead() const { return (flags_ & UnmapRead) ? true : false; } + void clearUnmapInfo(const void* mapAddress) + { + // Unmap must be serialized. + amd::ScopedLock lock(owner()->lockMemoryOps()); + auto it = writeMapInfo_.find(mapAddress); + if (it == writeMapInfo_.end()) { + // Get the first map info + it = writeMapInfo_.begin(); + } + writeMapInfo_.erase(it); + } //! Returns state of memory direct access flag bool isHostMemDirectAccess() const @@ -764,10 +802,8 @@ protected: HostMemoryDirectAccess = 0x00000001, //!< GPU has direct access to the host memory MapResourceAlloced = 0x00000002, //!< Map resource was allocated PinnedMemoryAlloced = 0x00000004, //!< An extra pinned resource was allocated - UnmapWrite = 0x00000008, //!< Memory was mapped for write - SubMemoryObject = 0x00000010, //!< Memory is sub-memory - HostMemoryRegistered = 0x00000020, //!< Host memory was registered - UnmapRead = 0x00000040, //!< Memory was mapped for read + SubMemoryObject = 0x00000008, //!< Memory is sub-memory + HostMemoryRegistered = 0x00000010, //!< Host memory was registered }; uint flags_; //!< Memory object flags @@ -781,7 +817,7 @@ protected: //! can use a remote resource and DMA, avoiding the additional CPU memcpy. amd::Memory* mapMemory_; //!< Memory used as map target buffer volatile size_t indirectMapCount_; //!< Number of maps - WriteMapInfo writeMapInfo_; //!< Saved write map info for partial unmap + std::map writeMapInfo_; //!< Saved write map info for partial unmap //! Increment map count void incIndMapCount() { ++indirectMapCount_; } diff --git a/rocclr/runtime/device/gpu/gpumemory.cpp b/rocclr/runtime/device/gpu/gpumemory.cpp index f965fa7155..807b26d6d4 100644 --- a/rocclr/runtime/device/gpu/gpumemory.cpp +++ b/rocclr/runtime/device/gpu/gpumemory.cpp @@ -945,9 +945,6 @@ Memory::allocMapTarget( } } mapAddress = mapMemory()->data(); - - // Use start of the indirect buffer - offset = 0; } return mapAddress + offset; diff --git a/rocclr/runtime/device/gpu/gpuvirtual.cpp b/rocclr/runtime/device/gpu/gpuvirtual.cpp index 30c268429c..19ba1efda9 100644 --- a/rocclr/runtime/device/gpu/gpuvirtual.cpp +++ b/rocclr/runtime/device/gpu/gpuvirtual.cpp @@ -1084,7 +1084,7 @@ VirtualGPU::submitMapMemory(amd::MapMemoryCommand& vcmd) gpu::Memory* memory = dev().getGpuMemory(&vcmd.memory()); // Save map info for unmap operation - memory->saveMapInfo(vcmd.origin(), vcmd.size(), + memory->saveMapInfo(vcmd.mapPtr(), vcmd.origin(), vcmd.size(), vcmd.mapFlags(), vcmd.isEntireMemory()); // If we have host memory, use it @@ -1111,7 +1111,7 @@ VirtualGPU::submitMapMemory(amd::MapMemoryCommand& vcmd) amd::Coord3D dstOrigin(0, 0, 0); if (memory->cal()->buffer_) { if (!blitMgr().copyBuffer(*memory, - *memory->mapMemory(), vcmd.origin(), dstOrigin, + *memory->mapMemory(), vcmd.origin(), vcmd.origin(), vcmd.size(), vcmd.isEntireMemory())) { LogError("submitMapMemory() - copy failed"); vcmd.setStatus(CL_MAP_FAILURE); @@ -1151,7 +1151,7 @@ VirtualGPU::submitMapMemory(amd::MapMemoryCommand& vcmd) amd::Image* amdImage = vcmd.memory().parent()->asImage(); if ((amdImage != NULL) && (amdImage->getMipLevels() > 1)) { // Save map write info in the parent object - dev().getGpuMemory(amdImage)->saveMapInfo( + dev().getGpuMemory(amdImage)->saveMapInfo(vcmd.mapPtr(), vcmd.origin(), vcmd.size(), vcmd.mapFlags(), vcmd.isEntireMemory(), vcmd.memory().asImage()); @@ -1183,22 +1183,24 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) gpu::Memory* memory = dev().getGpuMemory(&vcmd.memory()); amd::Memory* owner = memory->owner(); bool unmapMip = false; + const device::Memory::WriteMapInfo* writeMapInfo = + memory->writeMapInfo(vcmd.mapPtr()); // Check if image is a mipmap and assign a saved view amd::Image* amdImage = owner->asImage(); if ((amdImage != NULL) && (amdImage->getMipLevels() > 1) && - (memory->writeMapInfo()->baseMip_ != NULL)) { + (writeMapInfo->baseMip_ != NULL)) { // Clear unmap flags from the parent image - memory->clearUnmapFlags(); + memory->clearUnmapInfo(vcmd.mapPtr()); // Assign mip level view - amdImage = memory->writeMapInfo()->baseMip_; + amdImage = writeMapInfo->baseMip_; memory = dev().getGpuMemory(amdImage); unmapMip = true; } // We used host memory if ((owner->getHostMem() != NULL) && memory->isDirectMap()) { - if (memory->isUnmapWrite() && !owner->usesSvmPointer()) { + if (writeMapInfo->isUnmapWrite() && !owner->usesSvmPointer()) { // Target is the backing store, so sync owner->signalWrite(NULL); memory->syncCacheFromHost(*this); @@ -1212,17 +1214,17 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) memory->unmap(this); } else if (memory->mapMemory() != NULL) { - if (memory->isUnmapWrite()) { + if (writeMapInfo->isUnmapWrite()) { amd::Coord3D srcOrigin(0, 0, 0); // Target is a remote resource, so copy assert(memory->mapMemory() != NULL); if (memory->cal()->buffer_) { if (!blitMgr().copyBuffer( *memory->mapMemory(), *memory, - srcOrigin, - memory->writeMapInfo()->origin_, - memory->writeMapInfo()->region_, - memory->writeMapInfo()->entire_)) { + writeMapInfo->origin_, + writeMapInfo->origin_, + writeMapInfo->region_, + writeMapInfo->isEntire())) { LogError("submitUnmapMemory() - copy failed"); vcmd.setStatus(CL_OUT_OF_RESOURCES); } @@ -1230,8 +1232,8 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) else if ((vcmd.memory().getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER)) { amd::Memory* bufferFromImage = NULL; Memory* memoryBuf = memory; - amd::Coord3D origin(memory->writeMapInfo()->origin_[0]); - amd::Coord3D size(memory->writeMapInfo()->region_[0]); + amd::Coord3D origin(writeMapInfo->origin_[0]); + amd::Coord3D size(writeMapInfo->region_[0]); size_t elemSize = vcmd.memory().asImage()->getImageFormat().getElementSize(); origin.c[0] *= elemSize; @@ -1248,7 +1250,7 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) if (!blitMgr().copyBuffer( *memory->mapMemory(), *memoryBuf, srcOrigin, origin, size, - memory->writeMapInfo()->entire_)) { + writeMapInfo->isEntire())) { LogError("submitUnmapMemory() - copy failed"); vcmd.setStatus(CL_OUT_OF_RESOURCES); } @@ -1260,9 +1262,9 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) if (!blitMgr().copyBufferToImage( *memory->mapMemory(), *memory, srcOrigin, - memory->writeMapInfo()->origin_, - memory->writeMapInfo()->region_, - memory->writeMapInfo()->entire_)) { + writeMapInfo->origin_, + writeMapInfo->region_, + writeMapInfo->isEntire())) { LogError("submitUnmapMemory() - copy failed"); vcmd.setStatus(CL_OUT_OF_RESOURCES); } @@ -1275,7 +1277,7 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) } // Clear unmap flags - memory->clearUnmapFlags(); + memory->clearUnmapInfo(vcmd.mapPtr()); // Release a view for a mipmap map if (unmapMip) { @@ -1385,16 +1387,15 @@ VirtualGPU::submitSvmMapMemory(amd::SvmMapMemoryCommand& vcmd) if (!dev().isFineGrainedSystem()) { // Make sure we have memory for the command execution gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem()); - - memory->saveMapInfo(vcmd.origin(), vcmd.size(), + memory->saveMapInfo(vcmd.svmPtr(), vcmd.origin(), vcmd.size(), vcmd.mapFlags(), vcmd.isEntireMemory()); if (memory->mapMemory() != NULL) { if (vcmd.mapFlags() & (CL_MAP_READ | CL_MAP_WRITE)) { - amd::Coord3D dstOrigin(0, 0, 0); assert(memory->cal()->buffer_ && "SVM memory can't be an image"); if (!blitMgr().copyBuffer(*memory, *memory->mapMemory(), - vcmd.origin(), dstOrigin, vcmd.size(), vcmd.isEntireMemory())) { + vcmd.origin(), vcmd.origin(), vcmd.size(), + vcmd.isEntireMemory())) { LogError("submitSVMMapMemory() - copy failed"); vcmd.setStatus(CL_MAP_FAILURE); } @@ -1417,21 +1418,23 @@ VirtualGPU::submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& vcmd) //no op for FGS supported device if (!dev().isFineGrainedSystem()) { - gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem()); + const device::Memory::WriteMapInfo* writeMapInfo = + memory->writeMapInfo(vcmd.svmPtr()); + if (memory->mapMemory() != NULL) { - if (memory->isUnmapWrite()) { - amd::Coord3D srcOrigin(0, 0, 0); + if (writeMapInfo->isUnmapWrite()) { // Target is a remote resource, so copy assert(memory->cal()->buffer_ && "SVM memory can't be an image"); - if (!blitMgr().copyBuffer(*memory->mapMemory(), *memory, srcOrigin, - memory->writeMapInfo()->origin_, memory->writeMapInfo()->region_, - memory->writeMapInfo()->entire_)) { + if (!blitMgr().copyBuffer(*memory->mapMemory(), *memory, + writeMapInfo->origin_, writeMapInfo->origin_, + writeMapInfo->region_, writeMapInfo->isEntire())) { LogError("submitSvmUnmapMemory() - copy failed"); vcmd.setStatus(CL_OUT_OF_RESOURCES); } } } + memory->clearUnmapInfo(vcmd.svmPtr()); } profilingEnd(vcmd); diff --git a/rocclr/runtime/device/pal/palvirtual.cpp b/rocclr/runtime/device/pal/palvirtual.cpp index 1b1c80c93b..3da5c05333 100644 --- a/rocclr/runtime/device/pal/palvirtual.cpp +++ b/rocclr/runtime/device/pal/palvirtual.cpp @@ -1327,7 +1327,7 @@ VirtualGPU::submitMapMemory(amd::MapMemoryCommand& vcmd) pal::Memory* memory = dev().getGpuMemory(&vcmd.memory()); // Save map info for unmap operation - memory->saveMapInfo(vcmd.origin(), vcmd.size(), + memory->saveMapInfo(vcmd.mapPtr(), vcmd.origin(), vcmd.size(), vcmd.mapFlags(), vcmd.isEntireMemory()); // If we have host memory, use it @@ -1394,7 +1394,7 @@ VirtualGPU::submitMapMemory(amd::MapMemoryCommand& vcmd) amd::Image* amdImage = vcmd.memory().parent()->asImage(); if ((amdImage != nullptr) && (amdImage->getMipLevels() > 1)) { // Save map write info in the parent object - dev().getGpuMemory(amdImage)->saveMapInfo( + dev().getGpuMemory(amdImage)->saveMapInfo(vcmd.mapPtr(), vcmd.origin(), vcmd.size(), vcmd.mapFlags(), vcmd.isEntireMemory(), vcmd.memory().asImage()); @@ -1426,22 +1426,24 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) pal::Memory* memory = dev().getGpuMemory(&vcmd.memory()); amd::Memory* owner = memory->owner(); bool unmapMip = false; + const device::Memory::WriteMapInfo* writeMapInfo = + memory->writeMapInfo(vcmd.mapPtr()); // Check if image is a mipmap and assign a saved view amd::Image* amdImage = owner->asImage(); if ((amdImage != nullptr) && (amdImage->getMipLevels() > 1) && - (memory->writeMapInfo()->baseMip_ != nullptr)) { + (writeMapInfo->baseMip_ != nullptr)) { // Clear unmap flags from the parent image - memory->clearUnmapFlags(); + memory->clearUnmapInfo(vcmd.mapPtr()); // Assign mip level view - amdImage = memory->writeMapInfo()->baseMip_; + amdImage = writeMapInfo->baseMip_; memory = dev().getGpuMemory(amdImage); unmapMip = true; } // We used host memory if ((owner->getHostMem() != nullptr) && memory->isDirectMap()) { - if (memory->isUnmapWrite() && !owner->usesSvmPointer()) { + if (writeMapInfo->isUnmapWrite() && !owner->usesSvmPointer()) { // Target is the backing store, so sync owner->signalWrite(nullptr); memory->syncCacheFromHost(*this); @@ -1455,17 +1457,17 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) memory->unmap(this); } else if (memory->mapMemory() != nullptr) { - if (memory->isUnmapWrite()) { + if (writeMapInfo->isUnmapWrite()) { amd::Coord3D srcOrigin(0, 0, 0); // Target is a remote resource, so copy assert(memory->mapMemory() != nullptr); if (memory->desc().buffer_) { if (!blitMgr().copyBuffer( *memory->mapMemory(), *memory, - srcOrigin, - memory->writeMapInfo()->origin_, - memory->writeMapInfo()->region_, - memory->writeMapInfo()->entire_)) { + writeMapInfo->origin_, + writeMapInfo->origin_, + writeMapInfo->region_, + writeMapInfo->isEntire())) { LogError("submitUnmapMemory() - copy failed"); vcmd.setStatus(CL_OUT_OF_RESOURCES); } @@ -1473,8 +1475,8 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) else if ((vcmd.memory().getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER)) { amd::Memory* bufferFromImage = nullptr; Memory* memoryBuf = memory; - amd::Coord3D origin(memory->writeMapInfo()->origin_[0]); - amd::Coord3D size(memory->writeMapInfo()->region_[0]); + amd::Coord3D origin(writeMapInfo->origin_[0]); + amd::Coord3D size(writeMapInfo->region_[0]); size_t elemSize = vcmd.memory().asImage()->getImageFormat().getElementSize(); origin.c[0] *= elemSize; @@ -1491,7 +1493,7 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) if (!blitMgr().copyBuffer( *memory->mapMemory(), *memoryBuf, srcOrigin, origin, size, - memory->writeMapInfo()->entire_)) { + writeMapInfo->isEntire())) { LogError("submitUnmapMemory() - copy failed"); vcmd.setStatus(CL_OUT_OF_RESOURCES); } @@ -1503,9 +1505,9 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) if (!blitMgr().copyBufferToImage( *memory->mapMemory(), *memory, srcOrigin, - memory->writeMapInfo()->origin_, - memory->writeMapInfo()->region_, - memory->writeMapInfo()->entire_)) { + writeMapInfo->origin_, + writeMapInfo->region_, + writeMapInfo->isEntire())) { LogError("submitUnmapMemory() - copy failed"); vcmd.setStatus(CL_OUT_OF_RESOURCES); } @@ -1518,7 +1520,7 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) } // Clear unmap flags - memory->clearUnmapFlags(); + memory->clearUnmapInfo(vcmd.mapPtr()); // Release a view for a mipmap map if (unmapMip) { @@ -1629,15 +1631,14 @@ VirtualGPU::submitSvmMapMemory(amd::SvmMapMemoryCommand& vcmd) // Make sure we have memory for the command execution pal::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem()); - memory->saveMapInfo(vcmd.origin(), vcmd.size(), + memory->saveMapInfo(vcmd.svmPtr(), vcmd.origin(), vcmd.size(), vcmd.mapFlags(), vcmd.isEntireMemory()); if (memory->mapMemory() != nullptr) { if (vcmd.mapFlags() & (CL_MAP_READ | CL_MAP_WRITE)) { - amd::Coord3D dstOrigin(0, 0, 0); assert(memory->desc().buffer_ && "SVM memory can't be an image"); if (!blitMgr().copyBuffer(*memory, *memory->mapMemory(), - vcmd.origin(), dstOrigin, vcmd.size(), vcmd.isEntireMemory())) { + vcmd.origin(), vcmd.origin(), vcmd.size(), vcmd.isEntireMemory())) { LogError("submitSVMMapMemory() - copy failed"); vcmd.setStatus(CL_MAP_FAILURE); } @@ -1660,21 +1661,24 @@ VirtualGPU::submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& vcmd) //no op for FGS supported device if (!dev().isFineGrainedSystem()) { - pal::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem()); + const device::Memory::WriteMapInfo* writeMapInfo = + memory->writeMapInfo(vcmd.svmPtr()); + if (memory->mapMemory() != nullptr) { - if (memory->isUnmapWrite()) { + if (writeMapInfo->isUnmapWrite()) { amd::Coord3D srcOrigin(0, 0, 0); // Target is a remote resource, so copy assert(memory->desc().buffer_ && "SVM memory can't be an image"); - if (!blitMgr().copyBuffer(*memory->mapMemory(), *memory, srcOrigin, - memory->writeMapInfo()->origin_, memory->writeMapInfo()->region_, - memory->writeMapInfo()->entire_)) { + if (!blitMgr().copyBuffer(*memory->mapMemory(), *memory, + writeMapInfo->origin_, writeMapInfo->origin_, + writeMapInfo->region_, writeMapInfo->isEntire())) { LogError("submitSvmUnmapMemory() - copy failed"); vcmd.setStatus(CL_OUT_OF_RESOURCES); } } } + memory->clearUnmapInfo(vcmd.svmPtr()); } profilingEnd(vcmd); diff --git a/rocclr/runtime/platform/command.hpp b/rocclr/runtime/platform/command.hpp index 7d03925990..94d35ab071 100644 --- a/rocclr/runtime/platform/command.hpp +++ b/rocclr/runtime/platform/command.hpp @@ -709,10 +709,11 @@ public: class MapMemoryCommand: public OneMemoryArgCommand { private: - cl_map_flags mapFlags_; //!< Flags controlling the map. - bool blocking_; //!< True for blocking maps - Coord3D origin_; //!< Origin of the region to map. - Coord3D size_; //!< Size of the region to map. + cl_map_flags mapFlags_; //!< Flags controlling the map. + bool blocking_; //!< True for blocking maps + Coord3D origin_; //!< Origin of the region to map. + Coord3D size_; //!< Size of the region to map. + const void* mapPtr_; //!< Host-space pointer that the object is currently mapped at public: //! Construct a new MapMemoryCommand @@ -723,11 +724,12 @@ public: Memory& memory, cl_map_flags mapFlags, bool blocking, Coord3D origin, Coord3D size, - size_t* imgRowPitch = NULL, - size_t* imgSlicePitch = NULL) : + size_t* imgRowPitch = nullptr, + size_t* imgSlicePitch = nullptr, + void* mapPtr = nullptr) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory), mapFlags_(mapFlags), blocking_(blocking), - origin_(origin), size_(size) + origin_(origin), size_(size), mapPtr_(mapPtr) { // Sanity checks assert(size.c[0] > 0 && "invalid"); @@ -749,6 +751,9 @@ public: bool blocking() const { return blocking_; } //! Returns true if the entire memory object is mapped bool isEntireMemory() const; + //! Read the map pointer + const void* mapPtr() const { return mapPtr_; } + }; @@ -1398,10 +1403,11 @@ public: class SvmMapMemoryCommand : public Command { private: - Memory* svmMem_; //!< the pointer to the amd::Memory object corresponding the svm pointer mapped - Coord3D size_; //!< the map size - Coord3D origin_; //!< the origin of the mapped svm pointer shift from the beginning of svm space allocated - cl_map_flags flags_; //!< map flags + Memory* svmMem_; //!< the pointer to the amd::Memory object corresponding the svm pointer mapped + Coord3D size_; //!< the map size + Coord3D origin_; //!< the origin of the mapped svm pointer shift from the beginning of svm space allocated + cl_map_flags flags_; //!< map flags + void* svmPtr_; public: SvmMapMemoryCommand( @@ -1410,12 +1416,14 @@ public: Memory* svmMem, const size_t size, const size_t offset, - cl_map_flags flags) : - Command(queue, CL_COMMAND_SVM_MAP, eventWaitList), - svmMem_(svmMem), - size_(size), - origin_(offset), - flags_(flags) + cl_map_flags flags, + void* svmPtr) + : Command(queue, CL_COMMAND_SVM_MAP, eventWaitList) + , svmMem_(svmMem) + , size_(size) + , origin_(offset) + , flags_(flags) + , svmPtr_(svmPtr) { } @@ -1432,6 +1440,8 @@ public: Coord3D origin() const {return origin_;} + void* svmPtr() const { return svmPtr_; } + bool isEntireMemory() const; }; @@ -1441,14 +1451,18 @@ public: class SvmUnmapMemoryCommand : public Command { private: - Memory* svmMem_; //!< the pointer to the amd::Memory object corresponding the svm pointer mapped + Memory* svmMem_; //!< the pointer to the amd::Memory object corresponding the svm pointer mapped + void* svmPtr_; //!< SVM pointer + public: SvmUnmapMemoryCommand( HostQueue& queue, const EventWaitList& eventWaitList, - Memory* svmMem) : - Command(queue, CL_COMMAND_SVM_UNMAP, eventWaitList), - svmMem_(svmMem) + Memory* svmMem, + void* svmPtr) + : Command(queue, CL_COMMAND_SVM_UNMAP, eventWaitList) + , svmMem_(svmMem) + , svmPtr_(svmPtr) {} virtual void submit(device::VirtualDevice& device) @@ -1456,7 +1470,9 @@ public: device.submitSvmUnmapMemory(*this); } - Memory* getSvmMem() const {return svmMem_;} + Memory* getSvmMem() const { return svmMem_; } + + void* svmPtr() const { return svmPtr_; } }; /*! \brief A generic transfer memory from/to file command.