Files
rocm-systems/projects/clr/rocclr/device/blit.cpp
T

787 строки
26 KiB
C++
Исходник Обычный вид История

/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc.
2020-02-04 09:26:14 -08:00
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
2014-07-04 16:17:05 -04:00
#include "platform/commandqueue.hpp"
#include "device/device.hpp"
#include "device/blit.hpp"
#include "utils/debug.hpp"
#include <cmath>
2014-07-04 16:17:05 -04:00
namespace device {
HostBlitManager::HostBlitManager(VirtualDevice& vDev, Setup setup)
: BlitManager(setup), vDev_(vDev), dev_(vDev.device()) {}
2014-07-04 16:17:05 -04:00
bool HostBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
const amd::Coord3D& origin, const amd::Coord3D& size,
bool entire) const {
// Map the device memory to CPU visible
void* src = srcMemory.cpuMap(vDev_, Memory::CpuReadOnly);
if (NULL == src) {
LogError("Couldn't map device memory for host read");
return false;
}
2014-07-04 16:17:05 -04:00
// Copy memory
amd::Os::fastMemcpy(dstHost, reinterpret_cast<const_address>(src) + origin[0], size[0]);
2014-07-04 16:17:05 -04:00
// Unmap device memory
srcMemory.cpuUnmap(vDev_);
2014-07-04 16:17:05 -04:00
return true;
2014-07-04 16:17:05 -04:00
}
bool HostBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
const amd::BufferRect& bufRect,
const amd::BufferRect& hostRect, const amd::Coord3D& size,
bool entire) const {
// Map source memory
void* src = srcMemory.cpuMap(vDev_, Memory::CpuReadOnly);
if (src == NULL) {
LogError("Couldn't map source memory");
return false;
}
2014-07-04 16:17:05 -04:00
size_t srcOffset;
size_t dstOffset;
2014-07-04 16:17:05 -04:00
for (size_t z = 0; z < size[2]; ++z) {
for (size_t y = 0; y < size[1]; ++y) {
srcOffset = bufRect.offset(0, y, z);
dstOffset = hostRect.offset(0, y, z);
2014-07-04 16:17:05 -04:00
// Copy memory line by line
amd::Os::fastMemcpy((reinterpret_cast<address>(dstHost) + dstOffset),
(reinterpret_cast<const_address>(src) + srcOffset), size[0]);
2014-07-04 16:17:05 -04:00
}
}
2014-07-04 16:17:05 -04:00
// Unmap source memory
srcMemory.cpuUnmap(vDev_);
2014-07-04 16:17:05 -04:00
return true;
}
2014-07-04 16:17:05 -04:00
bool HostBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
const amd::Coord3D& origin, const amd::Coord3D& size,
size_t rowPitch, size_t slicePitch, bool entire) const {
size_t startLayer = origin[2];
size_t numLayers = size[2];
if (srcMemory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
startLayer = origin[1];
numLayers = size[1];
}
// rowPitch and slicePitch in bytes
size_t srcRowPitch;
size_t srcSlicePitch;
// Get physical GPU memmory
void* src = srcMemory.cpuMap(vDev_, Memory::CpuReadOnly, startLayer, numLayers, &srcRowPitch,
&srcSlicePitch);
if (NULL == src) {
LogError("Couldn't map GPU memory for host read");
return false;
}
size_t elementSize = srcMemory.owner()->asImage()->getImageFormat().getElementSize();
size_t srcOffsBase = origin[0] * elementSize;
size_t copySize = size[0] * elementSize;
size_t srcOffs;
size_t dstOffs = 0;
// Make sure we use the right pitch if it's not specified
if (rowPitch == 0) {
rowPitch = size[0] * elementSize;
}
// Make sure we use the right slice if it's not specified
if (slicePitch == 0) {
slicePitch = size[0] * size[1] * elementSize;
}
// Adjust destination offset with Y dimension
srcOffsBase += srcRowPitch * origin[1];
// Adjust the destination offset with Z dimension
srcOffsBase += srcSlicePitch * origin[2];
// Copy memory line by line
for (size_t slice = 0; slice < size[2]; ++slice) {
srcOffs = srcOffsBase + slice * srcSlicePitch;
dstOffs = slice * slicePitch;
2014-07-04 16:17:05 -04:00
// Copy memory line by line
for (size_t row = 0; row < size[1]; ++row) {
// Copy memory
amd::Os::fastMemcpy((reinterpret_cast<address>(dstHost) + dstOffs),
(reinterpret_cast<const_address>(src) + srcOffs), copySize);
2014-07-04 16:17:05 -04:00
srcOffs += srcRowPitch;
dstOffs += rowPitch;
2014-07-04 16:17:05 -04:00
}
}
2014-07-04 16:17:05 -04:00
// Unmap the device memory
srcMemory.cpuUnmap(vDev_);
2014-07-04 16:17:05 -04:00
return true;
2014-07-04 16:17:05 -04:00
}
bool HostBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
const amd::Coord3D& origin, const amd::Coord3D& size,
bool entire) const {
uint flags = 0;
if (entire) {
flags = Memory::CpuWriteOnly;
}
2014-07-04 16:17:05 -04:00
// Map the device memory to CPU visible
void* dst = dstMemory.cpuMap(vDev_, flags);
if (NULL == dst) {
LogError("Couldn't map GPU memory for host write");
return false;
}
2014-07-04 16:17:05 -04:00
// Copy memory
amd::Os::fastMemcpy(reinterpret_cast<address>(dst) + origin[0], srcHost, size[0]);
2014-07-04 16:17:05 -04:00
// Unmap the device memory
dstMemory.cpuUnmap(vDev_);
2014-07-04 16:17:05 -04:00
return true;
2014-07-04 16:17:05 -04:00
}
bool HostBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMemory,
const amd::BufferRect& hostRect,
const amd::BufferRect& bufRect, const amd::Coord3D& size,
bool entire) const {
// Map destination memory
void* dst = dstMemory.cpuMap(vDev_, (entire) ? Memory::CpuWriteOnly : 0);
if (dst == NULL) {
LogError("Couldn't map destination memory");
return false;
}
2014-07-04 16:17:05 -04:00
size_t srcOffset;
size_t dstOffset;
2014-07-04 16:17:05 -04:00
for (size_t z = 0; z < size[2]; ++z) {
for (size_t y = 0; y < size[1]; ++y) {
srcOffset = hostRect.offset(0, y, z);
dstOffset = bufRect.offset(0, y, z);
2014-07-04 16:17:05 -04:00
// Copy memory line by line
amd::Os::fastMemcpy((reinterpret_cast<address>(dst) + dstOffset),
(reinterpret_cast<const_address>(srcHost) + srcOffset), size[0]);
2014-07-04 16:17:05 -04:00
}
}
2014-07-04 16:17:05 -04:00
// Unmap destination memory
dstMemory.cpuUnmap(vDev_);
2014-07-04 16:17:05 -04:00
return true;
}
2014-07-04 16:17:05 -04:00
bool HostBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory,
const amd::Coord3D& origin, const amd::Coord3D& size,
size_t rowPitch, size_t slicePitch, bool entire) const {
uint flags = 0;
if (entire) {
flags = Memory::CpuWriteOnly;
}
size_t startLayer = origin[2];
size_t numLayers = size[2];
if (dstMemory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
startLayer = origin[1];
numLayers = size[1];
}
// rowPitch and slicePitch in bytes
size_t dstRowPitch;
size_t dstSlicePitch;
// Map the device memory to CPU visible
void* dst = dstMemory.cpuMap(vDev_, flags, startLayer, numLayers, &dstRowPitch, &dstSlicePitch);
if (NULL == dst) {
LogError("Couldn't map GPU memory for host write");
return false;
}
size_t elementSize = dstMemory.owner()->asImage()->getImageFormat().getElementSize();
size_t srcOffs = 0;
size_t copySize = size[0] * elementSize;
size_t dstOffsBase = origin[0] * elementSize;
size_t dstOffs;
// Make sure we use the right pitch if it's not specified
if (rowPitch == 0) {
rowPitch = size[0] * elementSize;
}
// Make sure we use the right slice if it's not specified
if (slicePitch == 0) {
slicePitch = size[0] * size[1] * elementSize;
}
// Adjust the destination offset with Y dimension
dstOffsBase += dstRowPitch * origin[1];
// Adjust the destination offset with Z dimension
dstOffsBase += dstSlicePitch * origin[2];
// Copy memory slice by slice
for (size_t slice = 0; slice < size[2]; ++slice) {
dstOffs = dstOffsBase + slice * dstSlicePitch;
srcOffs = slice * slicePitch;
2014-07-04 16:17:05 -04:00
// Copy memory line by line
for (size_t row = 0; row < size[1]; ++row) {
// Copy memory
amd::Os::fastMemcpy((reinterpret_cast<address>(dst) + dstOffs),
(reinterpret_cast<const_address>(srcHost) + srcOffs), copySize);
2014-07-04 16:17:05 -04:00
dstOffs += dstRowPitch;
srcOffs += rowPitch;
2014-07-04 16:17:05 -04:00
}
}
2014-07-04 16:17:05 -04:00
// Unmap the device memory
dstMemory.cpuUnmap(vDev_);
2014-07-04 16:17:05 -04:00
return true;
2014-07-04 16:17:05 -04:00
}
bool HostBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
const amd::Coord3D& size, bool entire) const {
// Map source memory
void* src = srcMemory.cpuMap(vDev_,
// Overlap detection
(&srcMemory == &dstMemory) ? 0 : Memory::CpuReadOnly);
if (src == NULL) {
LogError("Couldn't map source memory");
return false;
}
// Map destination memory
void* dst = dstMemory.cpuMap(vDev_, (entire) ? Memory::CpuWriteOnly : 0);
if (dst == NULL) {
LogError("Couldn't map destination memory");
return false;
}
// Straight forward buffer copy
amd::Os::fastMemcpy((reinterpret_cast<address>(dst) + dstOrigin[0]),
(reinterpret_cast<const_address>(src) + srcOrigin[0]), size[0]);
// Unmap source and destination memory
dstMemory.cpuUnmap(vDev_);
srcMemory.cpuUnmap(vDev_);
return true;
2014-07-04 16:17:05 -04:00
}
bool HostBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::BufferRect& srcRect, const amd::BufferRect& dstRect,
const amd::Coord3D& size, bool entire) const {
// Map source memory
void* src = srcMemory.cpuMap(vDev_,
// Overlap detection
(&srcMemory == &dstMemory) ? 0 : Memory::CpuReadOnly);
if (src == NULL) {
LogError("Couldn't map source memory");
return false;
}
// Map destination memory
void* dst = dstMemory.cpuMap(vDev_, (entire) ? Memory::CpuWriteOnly : 0);
if (dst == NULL) {
LogError("Couldn't map destination memory");
return false;
}
for (size_t z = 0; z < size[2]; ++z) {
for (size_t y = 0; y < size[1]; ++y) {
size_t srcOffset = srcRect.offset(0, y, z);
size_t dstOffset = dstRect.offset(0, y, z);
// Copy memory line by line
amd::Os::fastMemcpy((reinterpret_cast<address>(dst) + dstOffset),
(reinterpret_cast<const_address>(src) + srcOffset), size[0]);
}
}
// Unmap source and destination memory
dstMemory.cpuUnmap(vDev_);
srcMemory.cpuUnmap(vDev_);
return true;
2014-07-04 16:17:05 -04:00
}
bool HostBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin,
const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
bool entire, size_t rowPitch, size_t slicePitch) const {
size_t startLayer = srcOrigin[2];
size_t numLayers = size[2];
if (srcMemory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
startLayer = srcOrigin[1];
numLayers = size[1];
}
// rowPitch and slicePitch in bytes
size_t srcRowPitch;
size_t srcSlicePitch;
// Map source memory
void* src = srcMemory.cpuMap(vDev_, Memory::CpuReadOnly, startLayer, numLayers, &srcRowPitch,
&srcSlicePitch);
if (src == NULL) {
LogError("Couldn't map source memory");
return false;
}
size_t elementSize = srcMemory.owner()->asImage()->getImageFormat().getElementSize();
// Map destination memory
void* dst = dstMemory.cpuMap(vDev_, (entire) ? Memory::CpuWriteOnly : 0);
if (dst == NULL) {
LogError("Couldn't map destination memory");
return false;
}
size_t srcOffs = srcOrigin[0];
size_t dstOffs = dstOrigin[0];
size_t srcOffsOrg;
size_t copySize = size[0];
// Calculate the offset in bytes
srcOffs *= elementSize;
copySize *= elementSize;
// Adjust source offset with Y and Z dimensions
srcOffs += srcRowPitch * srcOrigin[1];
srcOffs += srcSlicePitch * srcOrigin[2];
srcOffsOrg = srcOffs;
// Copy memory slice by slice
for (size_t slice = 0; slice < size[2]; ++slice) {
srcOffs = srcOffsOrg + slice * srcSlicePitch;
2014-07-04 16:17:05 -04:00
// Copy memory line by line
for (size_t rows = 0; rows < size[1]; ++rows) {
amd::Os::fastMemcpy((reinterpret_cast<address>(dst) + dstOffs),
(reinterpret_cast<const_address>(src) + srcOffs), copySize);
2014-07-04 16:17:05 -04:00
srcOffs += srcRowPitch;
dstOffs += copySize;
2014-07-04 16:17:05 -04:00
}
}
2014-07-04 16:17:05 -04:00
// Unmap source and destination memory
srcMemory.cpuUnmap(vDev_);
dstMemory.cpuUnmap(vDev_);
2014-07-04 16:17:05 -04:00
return true;
2014-07-04 16:17:05 -04:00
}
bool HostBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin,
const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
bool entire, size_t rowPitch, size_t slicePitch) const {
// Map source memory
void* src = srcMemory.cpuMap(vDev_, Memory::CpuReadOnly);
if (src == NULL) {
LogError("Couldn't map source memory");
return false;
}
size_t startLayer = dstOrigin[2];
size_t numLayers = size[2];
if (dstMemory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
startLayer = dstOrigin[1];
numLayers = size[1];
}
// rowPitch and slicePitch in bytes
size_t dstRowPitch;
size_t dstSlicePitch;
// Map destination memory
void* dst = dstMemory.cpuMap(vDev_, (entire) ? Memory::CpuWriteOnly : 0, startLayer, numLayers,
&dstRowPitch, &dstSlicePitch);
if (dst == NULL) {
LogError("Couldn't map destination memory");
return false;
}
size_t elementSize = dstMemory.owner()->asImage()->getImageFormat().getElementSize();
size_t srcOffs = srcOrigin[0];
size_t dstOffs = dstOrigin[0];
size_t dstOffsOrg;
size_t copySize = size[0];
// Calculate the offset in bytes
dstOffs *= elementSize;
copySize *= elementSize;
// Adjust destination offset with Y and Z dimension
dstOffs += dstRowPitch * dstOrigin[1];
dstOffs += dstSlicePitch * dstOrigin[2];
dstOffsOrg = dstOffs;
// Copy memory slice by slice
for (size_t slice = 0; slice < size[2]; ++slice) {
dstOffs = dstOffsOrg + slice * dstSlicePitch;
2014-07-04 16:17:05 -04:00
// Copy memory line by line
for (size_t rows = 0; rows < size[1]; ++rows) {
amd::Os::fastMemcpy((reinterpret_cast<address>(dst) + dstOffs),
(reinterpret_cast<const_address>(src) + srcOffs), copySize);
2014-07-04 16:17:05 -04:00
srcOffs += copySize;
dstOffs += dstRowPitch;
2014-07-04 16:17:05 -04:00
}
}
2014-07-04 16:17:05 -04:00
// Unmap source and destination memory
srcMemory.cpuUnmap(vDev_);
dstMemory.cpuUnmap(vDev_);
2014-07-04 16:17:05 -04:00
return true;
2014-07-04 16:17:05 -04:00
}
bool HostBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
const amd::Coord3D& size, bool entire) const {
size_t startLayer = srcOrigin[2];
size_t numLayers = size[2];
if (srcMemory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
startLayer = srcOrigin[1];
numLayers = size[1];
}
// rowPitch and slicePitch in bytes
size_t srcRowPitch;
size_t srcSlicePitch;
// Map source memory
void* src = srcMemory.cpuMap(vDev_, Memory::CpuReadOnly, startLayer, numLayers, &srcRowPitch,
&srcSlicePitch);
if (src == NULL) {
LogError("Couldn't map source memory");
return false;
}
if (dstMemory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
startLayer = dstOrigin[1];
numLayers = size[1];
} else {
startLayer = dstOrigin[2];
numLayers = size[2];
}
// rowPitch and slicePitch in bytes
size_t dstRowPitch;
size_t dstSlicePitch;
// Map destination memory
void* dst = dstMemory.cpuMap(vDev_, (entire) ? Memory::CpuWriteOnly : 0, startLayer, numLayers,
&dstRowPitch, &dstSlicePitch);
if (dst == NULL) {
LogError("Couldn't map destination memory");
return false;
}
size_t elementSize = dstMemory.owner()->asImage()->getImageFormat().getElementSize();
assert(elementSize == srcMemory.owner()->asImage()->getImageFormat().getElementSize());
size_t srcOffs = srcOrigin[0];
size_t dstOffs = dstOrigin[0];
size_t srcOffsOrg;
size_t dstOffsOrg;
size_t copySize = size[0];
// Calculate the offsets in bytes
srcOffs *= elementSize;
dstOffs *= elementSize;
copySize *= elementSize;
// Adjust destination and sorce offsets with Y dimension
srcOffs += srcRowPitch * srcOrigin[1];
dstOffs += dstRowPitch * dstOrigin[1];
// Adjust destination and sorce offsets with Z dimension
srcOffs += srcSlicePitch * srcOrigin[2];
dstOffs += dstSlicePitch * dstOrigin[2];
srcOffsOrg = srcOffs;
dstOffsOrg = dstOffs;
// Copy memory slice by slice
for (size_t slice = 0; slice < size[2]; ++slice) {
srcOffs = srcOffsOrg + slice * srcSlicePitch;
dstOffs = dstOffsOrg + slice * dstSlicePitch;
2014-07-04 16:17:05 -04:00
// Copy memory line by line
for (size_t rows = 0; rows < size[1]; ++rows) {
amd::Os::fastMemcpy((reinterpret_cast<address>(dst) + dstOffs),
(reinterpret_cast<const_address>(src) + srcOffs), copySize);
2014-07-04 16:17:05 -04:00
srcOffs += srcRowPitch;
dstOffs += dstRowPitch;
2014-07-04 16:17:05 -04:00
}
}
2014-07-04 16:17:05 -04:00
// Unmap source and destination memory
srcMemory.cpuUnmap(vDev_);
dstMemory.cpuUnmap(vDev_);
2014-07-04 16:17:05 -04:00
return true;
2014-07-04 16:17:05 -04:00
}
bool HostBlitManager::fillBuffer(device::Memory& memory, const void* pattern, size_t patternSize,
const amd::Coord3D& surface, const amd::Coord3D& origin,
const amd::Coord3D& size, bool entire, bool forceBlit) const {
// Map memory
void* fillMem = memory.cpuMap(vDev_, (entire) ? Memory::CpuWriteOnly : 0);
if (fillMem == NULL) {
LogError("Couldn't map destination memory");
return false;
}
size_t offset = origin[0];
size_t fillSize = size[0];
if ((fillSize % patternSize) != 0) {
LogError("Misaligned buffer size and pattern size!");
}
// Fill the buffer memory with a pattern
for (size_t i = 0; i < (fillSize / patternSize); i++) {
memcpy((reinterpret_cast<address>(fillMem) + offset),
(reinterpret_cast<const_address>(pattern)), patternSize);
offset += patternSize;
}
// Unmap source and destination memory
memory.cpuUnmap(vDev_);
return true;
2014-07-04 16:17:05 -04:00
}
bool HostBlitManager::fillImage(device::Memory& memory, const void* pattern,
const amd::Coord3D& origin, const amd::Coord3D& size,
bool entire) const {
size_t startLayer = origin[2];
size_t numLayers = size[2];
if (memory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
startLayer = origin[1];
numLayers = size[1];
}
// rowPitch and slicePitch in bytes
size_t devRowPitch;
size_t devSlicePitch;
void* newpattern = const_cast<void*>(pattern);
float fFillColor[4];
// Converting a linear RGB floating-point color value to a normalized 8-bit unsigned integer sRGB
// value so that the cpu path can treat sRGB as RGB for host transfer.
if (memory.owner()->asImage()->getImageFormat().image_channel_order == CL_sRGBA) {
float* fColor = static_cast<float*>(newpattern);
fFillColor[0] = sRGBmap(fColor[0]) / 255.0f;
fFillColor[1] = sRGBmap(fColor[1]) / 255.0f;
fFillColor[2] = sRGBmap(fColor[2]) / 255.0f;
fFillColor[3] = fColor[3];
newpattern = static_cast<void*>(&fFillColor[0]);
}
// Map memory
void* fillMem = memory.cpuMap(vDev_, (entire) ? Memory::CpuWriteOnly : 0, startLayer, numLayers,
&devRowPitch, &devSlicePitch);
if (fillMem == NULL) {
LogError("Couldn't map destination memory");
return false;
}
float fillValue[4];
memset(fillValue, 0, sizeof(fillValue));
memory.owner()->asImage()->getImageFormat().formatColor(newpattern, fillValue);
size_t elementSize = memory.owner()->asImage()->getImageFormat().getElementSize();
size_t offset = origin[0] * elementSize;
size_t offsetOrg;
// Adjust offset with Y dimension
offset += devRowPitch * origin[1];
// Adjust offset with Z dimension
offset += devSlicePitch * origin[2];
offsetOrg = offset;
// Fill the image memory with a pattern
for (size_t slice = 0; slice < size[2]; ++slice) {
offset = offsetOrg + slice * devSlicePitch;
for (size_t rows = 0; rows < size[1]; ++rows) {
size_t pixOffset = offset;
// Copy memory pixel by pixel
for (size_t column = 0; column < size[0]; ++column) {
memcpy((reinterpret_cast<address>(fillMem) + pixOffset),
(reinterpret_cast<const_address>(fillValue)), elementSize);
pixOffset += elementSize;
}
offset += devRowPitch;
}
}
// Unmap memory
memory.cpuUnmap(vDev_);
return true;
2014-07-04 16:17:05 -04:00
}
uint32_t HostBlitManager::sRGBmap(float fc) const {
double c = (double)fc;
#ifdef ATI_OS_LINUX
if (std::isnan(c)) c = 0.0;
#else
if (_isnan(c)) c = 0.0;
#endif
if (c > 1.0)
c = 1.0;
else if (c < 0.0)
c = 0.0;
else if (c < 0.0031308)
c = 12.92 * c;
else
c = (1055.0 / 1000.0) * pow(c, 5.0 / 12.0) - (55.0 / 1000.0);
return (uint32_t)(c * 255.0 + 0.5);
}
bool HostBlitManager::FillBufferInfo::ExpandPattern64(uint64_t pattern, size_t pattern_size,
uint64_t& pattern64) {
bool retval = true;
do {
// If the pattern is 0 or if the pattern_size is same as max size.
if (pattern == 0 || pattern_size == sizeof(uint64_t)) {
pattern64 = pattern;
break;
}
// Clean Curr_pattern, since it was casted off from const void* with a lesser size than size_t.
ClearBits64(pattern, (pattern_size * 8));
pattern64 = 0;
if (pattern_size == sizeof(uint8_t)) {
pattern = pattern & 0xff;
pattern64 = ((pattern << 56) | (pattern << 48) | (pattern << 40) | (pattern << 32)
| (pattern << 24) | (pattern << 16) | (pattern << 8) | (pattern));
} else if (pattern_size == sizeof(uint16_t)) {
pattern = pattern & 0xffff;
pattern64 = ((pattern << 48) | (pattern << 32) | (pattern << 16) | (pattern));
} else if (pattern_size == sizeof(uint32_t)) {
pattern = pattern & 0xffffffff;
pattern64 = ((pattern << 32) | (pattern));
} else {
LogPrintfError("Unsupported Pattern size: %u \n", pattern_size);
retval = false;
break;
}
} while (0);
return retval;
}
bool HostBlitManager::FillBufferInfo::PackInfo(const device::Memory& memory, size_t fill_size,
size_t fill_origin, const void* pattern_ptr,
size_t pattern_size,
std::vector<FillBufferInfo>& packed_info) {
// 1. Validate input arguments
guarantee(fill_size >= pattern_size, "Pattern Size cannot be greater than fill size");
guarantee(fill_size <= memory.size(), "Cannot fill more than the mem object size");
// 2. Calculate the next closest dword aligned address for faster processing
size_t dst_addr = memory.virtualAddress() + fill_origin;
size_t aligned_dst_addr = amd::alignUp(dst_addr, sizeof(size_t));
guarantee(aligned_dst_addr >= dst_addr, "Aligned address cannot be greater than destination"
"address");
// 3. If given address is not aligned calculate head and tail size.
2022-07-07 19:57:19 +05:30
size_t head_size = std::min(aligned_dst_addr - dst_addr, fill_size);
size_t aligned_size = ((fill_size - head_size) / sizeof(size_t)) * sizeof(size_t);
size_t tail_size = (fill_size - head_size) % sizeof(size_t);
2022-07-07 19:57:19 +05:30
guarantee((head_size + aligned_size + tail_size) <= fill_size, "Head size, aligned size & tail"
"size together cannot cross fill size");
// 4. Clear unwanted bytes from the pattern if the pattern size is < sizeof(size_t).
uint64_t pattern = *(reinterpret_cast<uint64_t*>(const_cast<void*>(pattern_ptr)));
if (pattern_size < sizeof(uint64_t)) {
ClearBits64(pattern, (pattern_size * 8));
}
// 5. Fill the head, aligned, tail info if they exist.
FillBufferInfo fill_info;
if (head_size > 0) {
// Offsetted ptrs should align with pattern size. Runtime not responsible for rotating pattern.
guarantee((head_size % pattern_size) == 0, "Offseted ptr should align with pattern_size");
fill_info.fill_size_ = head_size;
packed_info.push_back(fill_info);
}
fill_info.clearInfo();
if (aligned_size > 0) {
// Offsetted ptrs should align with pattern size. Runtime not responsible for rotating pattern.
guarantee((aligned_size % pattern_size) == 0, "Offseted ptr should align with pattern_size");
if (pattern_size < sizeof(uint64_t)) {
if (!ExpandPattern64(pattern, pattern_size, fill_info.expanded_pattern_)) {
DevLogPrintfError("Failed Expanding the pattern for pattern:%u, pattern_size: %u",
pattern, pattern_size);
return false;
}
fill_info.pattern_expanded_ = true;
}
fill_info.fill_size_ = aligned_size;
packed_info.push_back(fill_info);
}
fill_info.clearInfo();
if (tail_size > 0) {
// Offsetted ptrs should align with pattern size. Runtime not responsible for rotating pattern.
guarantee((tail_size % pattern_size) == 0, "Offseted ptr should align with pattern_size");
fill_info.fill_size_ = tail_size;
packed_info.push_back(fill_info);
}
fill_info.clearInfo();
return true;
}
} // namespace gpu