diff --git a/rocclr/runtime/device/gpu/gpudevice.cpp b/rocclr/runtime/device/gpu/gpudevice.cpp index af1a94fbae..a5cd3f290b 100644 --- a/rocclr/runtime/device/gpu/gpudevice.cpp +++ b/rocclr/runtime/device/gpu/gpudevice.cpp @@ -1904,7 +1904,7 @@ Device::createSampler(const amd::Sampler& owner, device::Sampler** sampler) cons *sampler = NULL; if (settings().hsail_ || (settings().oclVersion_ >= OpenCL20)) { Sampler* gpuSampler = new Sampler(*this); - if ((NULL == gpuSampler) || !gpuSampler->create(owner.state())) { + if ((NULL == gpuSampler) || !gpuSampler->create(owner)) { delete gpuSampler; return false; } @@ -2449,7 +2449,9 @@ Device::destroyScratchBuffers() } void -Device::fillHwSampler(uint32_t state, void* hwState, uint32_t hwStateSize) const +Device::fillHwSampler( + uint32_t state, void* hwState, uint32_t hwStateSize, + uint32_t mipFilter, float minLod, float maxLod) const { // All GSL sampler's parameters are in floats uint32_t gslAddress = GSL_CLAMP_TO_BORDER; @@ -2483,8 +2485,25 @@ Device::fillHwSampler(uint32_t state, void* hwState, uint32_t hwStateSize) const gslMagFilter = GSL_MAG_LINEAR; } + if (mipFilter == CL_FILTER_NEAREST) { + if (gslMinFilter == GSL_MIN_NEAREST) { + gslMinFilter = GSL_MIN_NEAREST_MIPMAP_NEAREST; + } + else { + gslMinFilter = GSL_MIN_LINEAR_MIPMAP_NEAREST; + } + } + else if (mipFilter == CL_FILTER_LINEAR) { + if (gslMinFilter == GSL_MIN_NEAREST) { + gslMinFilter = GSL_MIN_NEAREST_MIPMAP_LINEAR; + } + else { + gslMinFilter = GSL_MIN_LINEAR_MIPMAP_LINEAR; + } + } + fillSamplerHwState(unnorm, gslMinFilter, gslMagFilter, - gslAddress, hwState, hwStateSize); + gslAddress, minLod, maxLod, hwState, hwStateSize); } void* @@ -2564,8 +2583,7 @@ Device::SrdManager::~SrdManager() } bool -Sampler::create( - uint32_t oclSamplerState) +Sampler::create(uint32_t oclSamplerState) { hwSrd_ = dev_.srds().allocSrdSlot(&hwState_); if (0 == hwSrd_) { @@ -2575,6 +2593,18 @@ Sampler::create( return true; } +bool +Sampler::create(const amd::Sampler& owner) +{ + hwSrd_ = dev_.srds().allocSrdSlot(&hwState_); + if (0 == hwSrd_) { + return false; + } + dev_.fillHwSampler(owner.state(), hwState_, HsaSamplerObjectSize, + owner.mipFilter(), owner.minLod(), owner.maxLod()); + return true; +} + Sampler::~Sampler() { dev_.srds().freeSrdSlot(hwSrd_); diff --git a/rocclr/runtime/device/gpu/gpudevice.hpp b/rocclr/runtime/device/gpu/gpudevice.hpp index 939212c4ce..f8d6ebacf8 100644 --- a/rocclr/runtime/device/gpu/gpudevice.hpp +++ b/rocclr/runtime/device/gpu/gpudevice.hpp @@ -140,6 +140,10 @@ class VirtualDevice; class PrintfDbg; class ThreadTrace; +#ifndef CL_FILTER_NONE +#define CL_FILTER_NONE 0x1142 +#endif + class Sampler : public device::Sampler { public: @@ -154,6 +158,11 @@ public: uint32_t oclSamplerState //!< OCL sampler state ); + //! Creates a device sampler from the OCL sampler state + bool create( + const amd::Sampler& owner //!< AMD sampler object + ); + const void* hwState() const { return hwState_; } private: @@ -518,9 +527,12 @@ public: //! Set GSL sampler to the specified state void fillHwSampler( - uint32_t state, //!< Sampler's OpenCL state - void* hwState, //!< Sampler's HW state - uint32_t hwStateSize //!< Size of sampler's HW state + uint32_t state, //!< Sampler's OpenCL state + void* hwState, //!< Sampler's HW state + uint32_t hwStateSize, //!< Size of sampler's HW state + uint32_t mipFilter = CL_FILTER_NONE, //!< Mip filter + float minLod = 0.f, //!< Min level of detail + float maxLod = CL_MAXFLOAT //!< Max level of detail ) const; //! host memory alloc diff --git a/rocclr/runtime/device/gpu/gslbe/src/rt/GSLDevice.cpp b/rocclr/runtime/device/gpu/gslbe/src/rt/GSLDevice.cpp index 278b92decd..40f9120929 100644 --- a/rocclr/runtime/device/gpu/gslbe/src/rt/GSLDevice.cpp +++ b/rocclr/runtime/device/gpu/gslbe/src/rt/GSLDevice.cpp @@ -1726,7 +1726,8 @@ CALGSLDevice::fillImageHwState(gslMemObject mem, void* hwState, uint32 hwStateSi } void -CALGSLDevice::fillSamplerHwState(bool unnorm, uint32 min, uint32 mag, uint32 addr, void* hwState, uint32 hwStateSize) const +CALGSLDevice::fillSamplerHwState(bool unnorm, uint32 min, uint32 mag, uint32 addr, + float minLod, float maxLod, void* hwState, uint32 hwStateSize) const { amd::ScopedLock k(gslDeviceOps()); m_textureSampler->setUnnormalizedMode(m_cs, unnorm); @@ -1735,6 +1736,8 @@ CALGSLDevice::fillSamplerHwState(bool unnorm, uint32 min, uint32 mag, uint32 add m_textureSampler->setWrap(m_cs, GSL_TEXTURE_WRAP_S, static_cast(addr)); m_textureSampler->setWrap(m_cs, GSL_TEXTURE_WRAP_T, static_cast(addr)); m_textureSampler->setWrap(m_cs, GSL_TEXTURE_WRAP_R, static_cast(addr)); + m_textureSampler->setMinLOD(m_cs, static_cast(minLod)); + m_textureSampler->setMaxLOD(m_cs, static_cast(maxLod)); m_textureSampler->getSamplerSrd(m_cs, hwState, hwStateSize); } diff --git a/rocclr/runtime/device/gpu/gslbe/src/rt/GSLDevice.h b/rocclr/runtime/device/gpu/gslbe/src/rt/GSLDevice.h index cb342a4f9e..95d5ec54ac 100644 --- a/rocclr/runtime/device/gpu/gslbe/src/rt/GSLDevice.h +++ b/rocclr/runtime/device/gpu/gslbe/src/rt/GSLDevice.h @@ -126,7 +126,8 @@ public: void fillImageHwState(gslMemObject mem, void* hwState, uint32 hwStateSize) const; - void fillSamplerHwState(bool unnorm, uint32 min, uint32 mag, uint32 addr, void* hwState, uint32 hwStateSize) const; + void fillSamplerHwState(bool unnorm, uint32 min, uint32 mag, uint32 addr, + float minLod, float maxLod, void* hwState, uint32 hwStateSize) const; gslSamplerObject txSampler() const { return m_textureSampler; } diff --git a/rocclr/runtime/platform/sampler.hpp b/rocclr/runtime/platform/sampler.hpp index a2a65261f4..10f9c900da 100644 --- a/rocclr/runtime/platform/sampler.hpp +++ b/rocclr/runtime/platform/sampler.hpp @@ -45,21 +45,30 @@ public: private: Context& context_; //!< OpenCL context associated with this sampler uint32_t state_; //!< Sampler state + uint mipFilter_; //!< mip filter + float minLod_; //!< min level of detail + float maxLod_; //!< max level of detail DeviceSamplers deviceSamplers_; //!< Container for the device samplers public: Sampler( Context& context, //!< OpenCL context - bool norm_coords, //!< normalized coordinates - uint addr_mode, //!< adressing mode - uint filter_mode //!< filter mode + bool normCoords, //!< normalized coordinates + uint addrMode, //!< adressing mode + uint filterMode, //!< filter mode + uint mipFilterMode, //!< mip filter mode + float minLod, //!< min level of detail + float maxLod //!< max level of detail ) : context_(context) + , mipFilter_(mipFilterMode) + , minLod_(minLod) + , maxLod_(maxLod) { // Packs the sampler state into uint32_t for kernel execution state_ = 0; // Set normalized state - if (norm_coords) { + if (normCoords) { state_ |= StateNormalizedCoordsTrue; } else { @@ -67,7 +76,7 @@ public: } // Program the sampler filter mode - if (filter_mode == CL_FILTER_LINEAR) { + if (filterMode == CL_FILTER_LINEAR) { state_ |= StateFilterLinear; } else { @@ -75,7 +84,7 @@ public: } // Program the sampler address mode - switch (addr_mode) { + switch (addrMode) { case CL_ADDRESS_CLAMP_TO_EDGE: state_ |= StateAddressClampToEdge; break; @@ -98,9 +107,8 @@ public: virtual ~Sampler() { - for (DeviceSamplers::const_iterator it = deviceSamplers_.begin(); - it != deviceSamplers_.end(); ++it) { - delete it->second; + for (const auto& it : deviceSamplers_) { + delete it.second; } } @@ -119,7 +127,7 @@ public: device::Sampler* getDeviceSampler(const Device& dev) const { - DeviceSamplers::const_iterator it = deviceSamplers_.find(&dev); + auto it = deviceSamplers_.find(&dev); if (it != deviceSamplers_.end()) { return it->second; } @@ -129,6 +137,10 @@ public: //! Accessor functions Context& context() const { return context_; } uint32_t state() const { return state_; } + uint mipFilter() const { return mipFilter_; } + float minLod() const { return minLod_; } + float maxLod() const { return maxLod_; } + bool normalizedCoords() const { return (state_ & StateNormalizedCoordsTrue) ? true : false; @@ -166,6 +178,7 @@ public: return ((state_ & StateFilterMask) == StateFilterNearest) ? CL_FILTER_NEAREST : CL_FILTER_LINEAR; } + //! RTTI internal implementation virtual ObjectType objectType() const { return ObjectTypeSampler; } };