diff --git a/projects/clr/rocclr/runtime/device/cpu/cpudevice.cpp b/projects/clr/rocclr/runtime/device/cpu/cpudevice.cpp index bf39c6a7dd..d871cff32e 100644 --- a/projects/clr/rocclr/runtime/device/cpu/cpudevice.cpp +++ b/projects/clr/rocclr/runtime/device/cpu/cpudevice.cpp @@ -1123,6 +1123,7 @@ Device::allocMapTarget( amd::Memory& mem, const amd::Coord3D& origin, const amd::Coord3D& region, + uint mapFlags, size_t* rowPitch, size_t* slicePitch) { diff --git a/projects/clr/rocclr/runtime/device/cpu/cpudevice.hpp b/projects/clr/rocclr/runtime/device/cpu/cpudevice.hpp index e52db18615..03dd1a8e12 100644 --- a/projects/clr/rocclr/runtime/device/cpu/cpudevice.hpp +++ b/projects/clr/rocclr/runtime/device/cpu/cpudevice.hpp @@ -143,6 +143,7 @@ public: amd::Memory& mem, //!< Abstraction layer memory object const amd::Coord3D& origin, //!< The map location in memory const amd::Coord3D& region, //!< The map region in memory + uint mapFlags, //!< Map flags size_t* rowPitch = NULL, //!< Row pitch for the mapped memory size_t* slicePitch = NULL //!< Slice for the mapped memory ); diff --git a/projects/clr/rocclr/runtime/device/device.cpp b/projects/clr/rocclr/runtime/device/device.cpp index 1cd452828e..f152a0fd8d 100644 --- a/projects/clr/rocclr/runtime/device/device.cpp +++ b/projects/clr/rocclr/runtime/device/device.cpp @@ -572,15 +572,21 @@ Kernel::~Kernel() } void -Memory::saveWriteMapInfo( +Memory::saveMapInfo( const amd::Coord3D origin, const amd::Coord3D region, + uint mapFlags, bool entire) { - writeMapInfo_.origin_ = origin; - writeMapInfo_.region_ = region; - writeMapInfo_.entire_ = entire; - flags_ |= UnmapWrite; + if (mapFlags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) { + writeMapInfo_.origin_ = origin; + writeMapInfo_.region_ = region; + writeMapInfo_.entire_ = entire; + flags_ |= UnmapWrite; + } + if (mapFlags & CL_MAP_READ) { + flags_ |= UnmapRead; + } } Program::Program(amd::Device& device) diff --git a/projects/clr/rocclr/runtime/device/device.hpp b/projects/clr/rocclr/runtime/device/device.hpp index c427ba2cfc..2eb8027789 100644 --- a/projects/clr/rocclr/runtime/device/device.hpp +++ b/projects/clr/rocclr/runtime/device/device.hpp @@ -727,20 +727,24 @@ public: //! Saves map info for this object //! @note: It's not a thread safe operation, the app must implement //! synchronization for the multiple write maps if necessary - void saveWriteMapInfo( + void saveMapInfo( const amd::Coord3D origin, //!< Origin of the map location const amd::Coord3D region, //!< Mapped region + uint mapFlags, //< Map flags bool entire //!< True if the enitre memory was mapped ); const WriteMapInfo* writeMapInfo() const { return &writeMapInfo_; } //! Clear memory object as mapped read only - void clearUnmapWrite() { flags_ &= ~UnmapWrite; } + void clearUnmapFlags() { flags_ &= ~(UnmapWrite | UnmapRead); } - //! Returns state of map read only flag + //! Returns state of map write flag bool isUnmapWrite() const { return (flags_ & UnmapWrite) ? true : false; } + //! Returns state of map read flag + bool isUnmapRead() const { return (flags_ & UnmapRead) ? true : false; } + //! Returns state of memory direct access flag bool isHostMemDirectAccess() const { return (flags_ & HostMemoryDirectAccess) ? true : false; } @@ -754,9 +758,10 @@ protected: HostMemoryDirectAccess = 0x00000001, //!< GPU has direct access to the host memory MapResourceAlloced = 0x00000002, //!< Map resource was allocated PinnedMemoryAlloced = 0x00000004, //!< An extra pinned resource was allocated - UnmapWrite = 0x00000008, //!< Memory was mapped read-only + UnmapWrite = 0x00000008, //!< Memory was mapped for write SubMemoryObject = 0x00000010, //!< Memory is sub-memory HostMemoryRegistered = 0x00000020, //!< Host memory was registered + UnmapRead = 0x00000040, //!< Memory was mapped for read }; uint flags_; //!< Memory object flags @@ -1587,6 +1592,7 @@ public: amd::Memory& mem, //!< Abstraction layer memory object const amd::Coord3D& origin, //!< The map location in memory const amd::Coord3D& region, //!< The map region in memory + uint mapFlags, //!< Map flags size_t* rowPitch = NULL, //!< Row pitch for the mapped memory size_t* slicePitch = NULL //!< Slice for the mapped memory ) = 0; diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp index 62236747a6..4fa1248573 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp @@ -2098,6 +2098,7 @@ Device::allocMapTarget( amd::Memory& mem, const amd::Coord3D& origin, const amd::Coord3D& region, + uint mapFlags, size_t* rowPitch, size_t* slicePitch) { @@ -2109,7 +2110,7 @@ Device::allocMapTarget( } // Pass request over to memory - return memory->allocMapTarget(origin, region, rowPitch, slicePitch); + return memory->allocMapTarget(origin, region, mapFlags, rowPitch, slicePitch); } bool diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp index 3776d290a2..e51b3dcd30 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp @@ -111,6 +111,7 @@ public: amd::Memory& mem, //!< Abstraction layer memory object const amd::Coord3D& origin, //!< The map location in memory const amd::Coord3D& region, //!< The map region in memory + uint mapFlags, //!< Map flags size_t* rowPitch = NULL, //!< Row pitch for the mapped memory size_t* slicePitch = NULL //!< Slice for the mapped memory ) { return NULL; } @@ -437,6 +438,7 @@ public: amd::Memory& mem, //!< Abstraction layer memory object const amd::Coord3D& origin, //!< The map location in memory const amd::Coord3D& region, //!< The map region in memory + uint mapFlags, //!< Map flags size_t* rowPitch = NULL, //!< Row pitch for the mapped memory size_t* slicePitch = NULL //!< Slice for the mapped memory ); diff --git a/projects/clr/rocclr/runtime/device/gpu/gpumemory.cpp b/projects/clr/rocclr/runtime/device/gpu/gpumemory.cpp index ba059ff2c0..ba32590eb4 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpumemory.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpumemory.cpp @@ -906,6 +906,7 @@ void* Memory::allocMapTarget( const amd::Coord3D& origin, const amd::Coord3D& region, + uint mapFlags, size_t* rowPitch, size_t* slicePitch) { @@ -921,7 +922,8 @@ Memory::allocMapTarget( incIndMapCount(); // If host memory exists, use it - if (owner()->getHostMem() != NULL) { + if ((owner()->getHostMem() != NULL) && + (isCacheable() || !isHostMemDirectAccess() || !(mapFlags & CL_MAP_READ))) { mapAddress = reinterpret_cast
(owner()->getHostMem()); } // If resource is a persistent allocation, we can use it directly @@ -1226,6 +1228,7 @@ void* Image::allocMapTarget( const amd::Coord3D& origin, const amd::Coord3D& region, + uint mapFlags, size_t* rowPitch, size_t* slicePitch) { @@ -1245,7 +1248,8 @@ Image::allocMapTarget( incIndMapCount(); // If host memory exists, use it - if (owner()->getHostMem() != NULL) { + if ((owner()->getHostMem() != NULL) && + (isCacheable() || !isHostMemDirectAccess() || !(mapFlags & CL_MAP_READ))) { useRemoteResource = false; mapAddress = reinterpret_cast(owner()->getHostMem()); amd::Image* amdImage = owner()->asImage(); diff --git a/projects/clr/rocclr/runtime/device/gpu/gpumemory.hpp b/projects/clr/rocclr/runtime/device/gpu/gpumemory.hpp index a25332aeab..fbcd63d0d7 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpumemory.hpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpumemory.hpp @@ -121,6 +121,7 @@ public: virtual void* allocMapTarget( const amd::Coord3D& origin, //!< The map location in memory const amd::Coord3D& region, //!< The map region in memory + uint mapFlags, //!< Map flags size_t* rowPitch = NULL, //!< Row pitch for the mapped memory size_t* slicePitch = NULL //!< Slice for the mapped memory ); @@ -288,6 +289,7 @@ public: virtual void* allocMapTarget( const amd::Coord3D& origin, //!< The map location in memory const amd::Coord3D& region, //!< The map region in memory + uint mapFlags, //!< Map flags size_t* rowPitch = NULL, //!< Row pitch for the mapped memory size_t* slicePitch = NULL //!< Slice for the mapped memory ); diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp index b693322eec..4e1cb76edf 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp @@ -1085,14 +1085,15 @@ VirtualGPU::submitMapMemory(amd::MapMemoryCommand& vcmd) gpu::Memory* memory = dev().getGpuMemory(&vcmd.memory()); - // Save write map info for unmap copy - if (vcmd.mapFlags() & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) { - memory->saveWriteMapInfo(vcmd.origin(), - vcmd.size(), vcmd.isEntireMemory()); - } + // Save map info for unmap operation + memory->saveMapInfo(vcmd.origin(), vcmd.size(), + vcmd.mapFlags(), vcmd.isEntireMemory()); // If we have host memory, use it - if (memory->owner()->getHostMem() != NULL) { + if ((memory->owner()->getHostMem() != NULL) && + (memory->isCacheable() || + !memory->isHostMemDirectAccess() || + !(vcmd.mapFlags() & CL_MAP_READ))) { if (!memory->isHostMemDirectAccess()) { // Make sure GPU finished operation before // synchronization with the backing store @@ -1177,7 +1178,10 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) amd::Memory* owner = memory->owner(); // We used host memory - if (owner->getHostMem() != NULL) { + if ((owner->getHostMem() != NULL) && + (memory->isCacheable() || + !memory->isHostMemDirectAccess() || + !memory->isUnmapRead())) { if (memory->isUnmapWrite()) { // Target is the backing store, so sync owner->signalWrite(NULL); @@ -1254,8 +1258,8 @@ VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) vcmd.setStatus(CL_INVALID_VALUE); } - // Clear read only flag - memory->clearUnmapWrite(); + // Clear unmap flags + memory->clearUnmapFlags(); profilingEnd(vcmd); } @@ -1357,31 +1361,20 @@ VirtualGPU::submitSvmMapMemory(amd::SvmMapMemoryCommand& vcmd) profilingBegin(vcmd, true); - //check if the ptr is in the svm space - amd::Memory* svmMem = vcmd.getSvmMem(); - if (NULL == svmMem) { - LogWarning("wrong svm address "); - vcmd.setStatus(CL_INVALID_VALUE); - return; - } - // Make sure we have memory for the command execution - gpu::Memory* memory = dev().getGpuMemory(svmMem); + gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem()); - if (vcmd.mapFlags() & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) { - memory->saveWriteMapInfo(vcmd.origin(), vcmd.size(), vcmd.isEntireMemory()); - } + memory->saveMapInfo(vcmd.origin(), vcmd.size(), + vcmd.mapFlags(), vcmd.isEntireMemory()); if (memory->mapMemory() != NULL) { if (vcmd.mapFlags() & (CL_MAP_READ | CL_MAP_WRITE)) { amd::Coord3D dstOrigin(0, 0, 0); - if (memory->cal()->buffer_) { - if (!blitMgr().copyBuffer(*memory, - *memory->mapMemory(), vcmd.origin(), dstOrigin, - vcmd.size(), vcmd.isEntireMemory())) { - LogError("submitSVMMapMemory() - copy failed"); - vcmd.setStatus(CL_MAP_FAILURE); - } + assert(memory->cal()->buffer_ && "SVM memory can't be an image"); + if (!blitMgr().copyBuffer(*memory, *memory->mapMemory(), + vcmd.origin(), dstOrigin, vcmd.size(), vcmd.isEntireMemory())) { + LogError("submitSVMMapMemory() - copy failed"); + vcmd.setStatus(CL_MAP_FAILURE); } } } @@ -1399,30 +1392,18 @@ VirtualGPU::submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& vcmd) amd::ScopedLock lock(execution()); profilingBegin(vcmd, true); - amd::Memory* svmMem = vcmd.getSvmMem(); - if (NULL == svmMem) { - LogWarning("wrong svm address "); - vcmd.setStatus(CL_INVALID_VALUE); - return; - } - - gpu::Memory* memory = dev().getGpuMemory(svmMem); + gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem()); if (memory->mapMemory() != NULL) { if (memory->isUnmapWrite()) { amd::Coord3D srcOrigin(0, 0, 0); // Target is a remote resource, so copy - assert(memory->mapMemory() != NULL); - if (memory->cal()->buffer_) { - if (!blitMgr().copyBuffer( - *memory->mapMemory(), *memory, - srcOrigin, - memory->writeMapInfo()->origin_, - memory->writeMapInfo()->region_, - memory->writeMapInfo()->entire_)) { - LogError("submitUnmapMemory() - copy failed"); - vcmd.setStatus(CL_OUT_OF_RESOURCES); - } + assert(memory->cal()->buffer_ && "SVM memory can't be an image"); + if (!blitMgr().copyBuffer(*memory->mapMemory(), *memory, srcOrigin, + memory->writeMapInfo()->origin_, memory->writeMapInfo()->region_, + memory->writeMapInfo()->entire_)) { + LogError("submitSvmUnmapMemory() - copy failed"); + vcmd.setStatus(CL_OUT_OF_RESOURCES); } } } diff --git a/projects/clr/rocclr/runtime/device/hsa/hsadevice.cpp b/projects/clr/rocclr/runtime/device/hsa/hsadevice.cpp index 4e0aa1b719..fc7443bc85 100644 --- a/projects/clr/rocclr/runtime/device/hsa/hsadevice.cpp +++ b/projects/clr/rocclr/runtime/device/hsa/hsadevice.cpp @@ -685,6 +685,7 @@ Device::allocMapTarget( amd::Memory& mem, const amd::Coord3D& origin, const amd::Coord3D& region, + uint mapFlags, size_t* rowPitch, size_t* slicePitch) { @@ -696,7 +697,7 @@ Device::allocMapTarget( } // Pass request over to memory - return memory->allocMapTarget(origin, region, rowPitch, slicePitch); + return memory->allocMapTarget(origin, region, mapFlags, rowPitch, slicePitch); } bool diff --git a/projects/clr/rocclr/runtime/device/hsa/hsadevice.hpp b/projects/clr/rocclr/runtime/device/hsa/hsadevice.hpp index e50206bbe2..5cd55add6b 100644 --- a/projects/clr/rocclr/runtime/device/hsa/hsadevice.hpp +++ b/projects/clr/rocclr/runtime/device/hsa/hsadevice.hpp @@ -191,6 +191,7 @@ public: amd::Memory& mem, //!< Abstraction layer memory object const amd::Coord3D& origin, //!< The map location in memory const amd::Coord3D& region, //!< The map region in memory + uint mapFlags, //!< Map flags size_t* rowPitch = NULL, //!< Row pitch for the mapped memory size_t* slicePitch = NULL //!< Slice for the mapped memory ) { @@ -323,6 +324,7 @@ public: amd::Memory &mem, //!< Abstraction layer memory object const amd::Coord3D &origin, //!< The map location in memory const amd::Coord3D ®ion, //!< The map region in memory + uint mapFlags, //!< Map flags size_t *rowPitch = NULL, //!< Row pitch for the mapped memory size_t *slicePitch = NULL //!< Slice for the mapped memory ); diff --git a/projects/clr/rocclr/runtime/device/hsa/hsamemory.cpp b/projects/clr/rocclr/runtime/device/hsa/hsamemory.cpp index f87e31371e..08b0b6d5b0 100644 --- a/projects/clr/rocclr/runtime/device/hsa/hsamemory.cpp +++ b/projects/clr/rocclr/runtime/device/hsa/hsamemory.cpp @@ -79,6 +79,7 @@ Memory::freeMapMemory() void * Memory::allocMapTarget(const amd::Coord3D &origin, const amd::Coord3D ®ion, + uint mapFlags, size_t *rowPitch, size_t *slicePitch) { @@ -146,7 +147,7 @@ Memory::cpuMap( { // Create the map target. void * mapTarget = - allocMapTarget(amd::Coord3D(0), amd::Coord3D(0), rowPitch, slicePitch); + allocMapTarget(amd::Coord3D(0), amd::Coord3D(0), 0, rowPitch, slicePitch); // Sync to map target if no direct access. if (!isHostMemDirectAccess()) { @@ -862,6 +863,7 @@ Image::createView(Image &parent) void* Image::allocMapTarget(const amd::Coord3D& origin, const amd::Coord3D& region, + uint mapFlags, size_t* rowPitch, size_t* slicePitch) { diff --git a/projects/clr/rocclr/runtime/device/hsa/hsamemory.hpp b/projects/clr/rocclr/runtime/device/hsa/hsamemory.hpp index 009ebe4609..3ebdb3e7cc 100644 --- a/projects/clr/rocclr/runtime/device/hsa/hsamemory.hpp +++ b/projects/clr/rocclr/runtime/device/hsa/hsamemory.hpp @@ -35,6 +35,7 @@ class Memory : public device::Memory { // of an indirect map for a given memory object virtual void *allocMapTarget(const amd::Coord3D &origin, const amd::Coord3D ®ion, + uint mapFlags, size_t *rowPitch, size_t *slicePitch); @@ -168,6 +169,7 @@ public: //! of an indirect map for a given memory object virtual void* allocMapTarget(const amd::Coord3D& origin, const amd::Coord3D& region, + uint mapFlags, size_t* rowPitch, size_t* slicePitch); diff --git a/projects/clr/rocclr/runtime/device/hsa/hsavirtual.cpp b/projects/clr/rocclr/runtime/device/hsa/hsavirtual.cpp index 65d31516d9..072cead536 100644 --- a/projects/clr/rocclr/runtime/device/hsa/hsavirtual.cpp +++ b/projects/clr/rocclr/runtime/device/hsa/hsavirtual.cpp @@ -589,9 +589,8 @@ void VirtualGPU::submitMapMemory(amd::MapMemoryCommand &cmd) // Save map write requirement. if (mapFlag & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) { - devMemory->saveWriteMapInfo(cmd.origin(), - cmd.size(), - cmd.isEntireMemory()); + devMemory->saveMapInfo(cmd.origin(), cmd.size(), + mapFlag, cmd.isEntireMemory()); } // Sync to the map target. @@ -686,7 +685,7 @@ void VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand &cmd) } } - devMemory->clearUnmapWrite(); + devMemory->clearUnmapFlags(); cmd.memory().signalWrite(&dev()); }