diff --git a/rocclr/device/blit.cpp b/rocclr/device/blit.cpp index 107ca57b79..4bd854219f 100644 --- a/rocclr/device/blit.cpp +++ b/rocclr/device/blit.cpp @@ -763,8 +763,8 @@ bool HostBlitManager::FillBufferInfo::PackInfo(const device::Memory& memory, siz pattern, pattern_size); return false; } + fill_info.pattern_expanded_ = true; } - fill_info.pattern_aligned_ = true; fill_info.fill_size_ = aligned_size; packed_info.push_back(fill_info); } diff --git a/rocclr/device/blit.hpp b/rocclr/device/blit.hpp index 353af4b60c..d9ac765550 100644 --- a/rocclr/device/blit.hpp +++ b/rocclr/device/blit.hpp @@ -354,7 +354,7 @@ class HostBlitManager : public device::BlitManager { // Packed Fill Buffer class FillBufferInfo { public: - FillBufferInfo(): fill_size_(0), expanded_pattern_(0), pattern_aligned_(false) {} + FillBufferInfo(): fill_size_(0), expanded_pattern_(0), pattern_expanded_(false) {} static bool PackInfo(const device::Memory& memory, size_t fill_size, size_t fill_origin, const void* pattern, size_t pattern_size, @@ -371,13 +371,13 @@ class HostBlitManager : public device::BlitManager { void clearInfo () { fill_size_ = 0; expanded_pattern_ = 0; - pattern_aligned_ = false; + pattern_expanded_ = false; } public: size_t fill_size_; // Fill size for this command uint64_t expanded_pattern_; // Pattern for this command - bool pattern_aligned_; // Boolean to check if pattern is expanded + bool pattern_expanded_; // Boolean to check if pattern is expanded }; diff --git a/rocclr/device/rocm/rocblit.cpp b/rocclr/device/rocm/rocblit.cpp index 5cf115665f..6367a6053f 100644 --- a/rocclr/device/rocm/rocblit.cpp +++ b/rocclr/device/rocm/rocblit.cpp @@ -1966,18 +1966,17 @@ bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern, FillBufferInfo::PackInfo(memory, size[0], origin[0], pattern, patternSize, packed_vector); size_t overall_offset = origin[0]; - uint fillType = FillBufferAligned; for (auto& packed_obj: packed_vector) { + uint fillType = FillBufferAligned; + size_t globalWorkOffset[3] = {0, 0, 0}; + size_t globalWorkSize = amd::alignUp(packed_obj.fill_size_, 256); + size_t localWorkSize = 256; - uint32_t kpattern_size32 = (packed_obj.pattern_aligned_) ? sizeof(size_t) : patternSize; + uint32_t kpattern_size32 = (packed_obj.pattern_expanded_) ? sizeof(size_t) : patternSize; size_t kfill_size = packed_obj.fill_size_/kpattern_size32; size_t koffset = overall_offset; overall_offset += packed_obj.fill_size_; - size_t globalWorkOffset[3] = {0, 0, 0}; - size_t globalWorkSize = amd::alignUp(kfill_size, 256); - size_t localWorkSize = 256; - uint32_t alignment = (kpattern_size32 & 0x7) == 0 ? sizeof(uint64_t) : (kpattern_size32 & 0x3) == 0 ? @@ -2019,7 +2018,7 @@ bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern, auto constBuf = reinterpret_cast
(constantBuffer_->getHostMem()) + constBufOffset; // If pattern has been expanded, use the expanded pattern, otherwise use the default pattern. - if (packed_obj.pattern_aligned_) { + if (packed_obj.pattern_expanded_) { memcpy(constBuf, &packed_obj.expanded_pattern_, kpattern_size32); } else { memcpy(constBuf, pattern, kpattern_size32); @@ -2028,8 +2027,8 @@ bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern, mem = as_cl(gpuCB->owner()); setArgument(kernels_[fillType], 4, sizeof(cl_mem), &mem, constBufOffset); - kpattern_size32 /= alignment; koffset /= alignment; + kpattern_size32 /= alignment; setArgument(kernels_[fillType], 5, sizeof(uint32_t), &kpattern_size32); setArgument(kernels_[fillType], 6, sizeof(koffset), &koffset);