// // Copyright (c) 2010 Advanced Micro Devices, Inc. All rights reserved. // #include "platform/commandqueue.hpp" #include "device/device.hpp" #include "device/blit.hpp" #include "utils/debug.hpp" #include namespace device { HostBlitManager::HostBlitManager(VirtualDevice& vDev, Setup setup) : BlitManager(setup), vDev_(vDev), dev_(vDev.device()) {} bool HostBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost, const amd::Coord3D& origin, const amd::Coord3D& size, bool entire) const { // Map the device memory to CPU visible void* src = srcMemory.cpuMap(vDev_, Memory::CpuReadOnly); if (NULL == src) { LogError("Couldn't map device memory for host read"); return false; } // Copy memory amd::Os::fastMemcpy(dstHost, reinterpret_cast(src) + origin[0], size[0]); // Unmap device memory srcMemory.cpuUnmap(vDev_); return true; } bool HostBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost, const amd::BufferRect& bufRect, const amd::BufferRect& hostRect, const amd::Coord3D& size, bool entire) const { // Map source memory void* src = srcMemory.cpuMap(vDev_, Memory::CpuReadOnly); if (src == NULL) { LogError("Couldn't map source memory"); return false; } size_t srcOffset; size_t dstOffset; for (size_t z = 0; z < size[2]; ++z) { for (size_t y = 0; y < size[1]; ++y) { srcOffset = bufRect.offset(0, y, z); dstOffset = hostRect.offset(0, y, z); // Copy memory line by line amd::Os::fastMemcpy((reinterpret_cast

(dstHost) + dstOffset), (reinterpret_cast(src) + srcOffset), size[0]); } } // Unmap source memory srcMemory.cpuUnmap(vDev_); return true; } bool HostBlitManager::readImage(device::Memory& srcMemory, void* dstHost, const amd::Coord3D& origin, const amd::Coord3D& size, size_t rowPitch, size_t slicePitch, bool entire) const { size_t startLayer = origin[2]; size_t numLayers = size[2]; if (srcMemory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) { startLayer = origin[1]; numLayers = size[1]; } // rowPitch and slicePitch in bytes size_t srcRowPitch; size_t srcSlicePitch; // Get physical GPU memmory void* src = srcMemory.cpuMap(vDev_, Memory::CpuReadOnly, startLayer, numLayers, &srcRowPitch, &srcSlicePitch); if (NULL == src) { LogError("Couldn't map GPU memory for host read"); return false; } size_t elementSize = srcMemory.owner()->asImage()->getImageFormat().getElementSize(); size_t srcOffsBase = origin[0] * elementSize; size_t copySize = size[0] * elementSize; size_t srcOffs; size_t dstOffs = 0; // Make sure we use the right pitch if it's not specified if (rowPitch == 0) { rowPitch = size[0] * elementSize; } // Make sure we use the right slice if it's not specified if (slicePitch == 0) { slicePitch = size[0] * size[1] * elementSize; } // Adjust destination offset with Y dimension srcOffsBase += srcRowPitch * origin[1]; // Adjust the destination offset with Z dimension srcOffsBase += srcSlicePitch * origin[2]; // Copy memory line by line for (size_t slice = 0; slice < size[2]; ++slice) { srcOffs = srcOffsBase + slice * srcSlicePitch; dstOffs = slice * slicePitch; // Copy memory line by line for (size_t row = 0; row < size[1]; ++row) { // Copy memory amd::Os::fastMemcpy((reinterpret_cast

(dstHost) + dstOffs), (reinterpret_cast(src) + srcOffs), copySize); srcOffs += srcRowPitch; dstOffs += rowPitch; } } // Unmap the device memory srcMemory.cpuUnmap(vDev_); return true; } bool HostBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory, const amd::Coord3D& origin, const amd::Coord3D& size, bool entire) const { uint flags = 0; if (entire) { flags = Memory::CpuWriteOnly; } // Map the device memory to CPU visible void* dst = dstMemory.cpuMap(vDev_, flags); if (NULL == dst) { LogError("Couldn't map GPU memory for host write"); return false; } // Copy memory amd::Os::fastMemcpy(reinterpret_cast

(dst) + origin[0], srcHost, size[0]); // Unmap the device memory dstMemory.cpuUnmap(vDev_); return true; } bool HostBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMemory, const amd::BufferRect& hostRect, const amd::BufferRect& bufRect, const amd::Coord3D& size, bool entire) const { // Map destination memory void* dst = dstMemory.cpuMap(vDev_, (entire) ? Memory::CpuWriteOnly : 0); if (dst == NULL) { LogError("Couldn't map destination memory"); return false; } size_t srcOffset; size_t dstOffset; for (size_t z = 0; z < size[2]; ++z) { for (size_t y = 0; y < size[1]; ++y) { srcOffset = hostRect.offset(0, y, z); dstOffset = bufRect.offset(0, y, z); // Copy memory line by line amd::Os::fastMemcpy((reinterpret_cast

(dst) + dstOffset), (reinterpret_cast(srcHost) + srcOffset), size[0]); } } // Unmap destination memory dstMemory.cpuUnmap(vDev_); return true; } bool HostBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory, const amd::Coord3D& origin, const amd::Coord3D& size, size_t rowPitch, size_t slicePitch, bool entire) const { uint flags = 0; if (entire) { flags = Memory::CpuWriteOnly; } size_t startLayer = origin[2]; size_t numLayers = size[2]; if (dstMemory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) { startLayer = origin[1]; numLayers = size[1]; } // rowPitch and slicePitch in bytes size_t dstRowPitch; size_t dstSlicePitch; // Map the device memory to CPU visible void* dst = dstMemory.cpuMap(vDev_, flags, startLayer, numLayers, &dstRowPitch, &dstSlicePitch); if (NULL == dst) { LogError("Couldn't map GPU memory for host write"); return false; } size_t elementSize = dstMemory.owner()->asImage()->getImageFormat().getElementSize(); size_t srcOffs = 0; size_t copySize = size[0] * elementSize; size_t dstOffsBase = origin[0] * elementSize; size_t dstOffs; // Make sure we use the right pitch if it's not specified if (rowPitch == 0) { rowPitch = size[0] * elementSize; } // Make sure we use the right slice if it's not specified if (slicePitch == 0) { slicePitch = size[0] * size[1] * elementSize; } // Adjust the destination offset with Y dimension dstOffsBase += dstRowPitch * origin[1]; // Adjust the destination offset with Z dimension dstOffsBase += dstSlicePitch * origin[2]; // Copy memory slice by slice for (size_t slice = 0; slice < size[2]; ++slice) { dstOffs = dstOffsBase + slice * dstSlicePitch; srcOffs = slice * slicePitch; // Copy memory line by line for (size_t row = 0; row < size[1]; ++row) { // Copy memory amd::Os::fastMemcpy((reinterpret_cast

(dst) + dstOffs), (reinterpret_cast(srcHost) + srcOffs), copySize); dstOffs += dstRowPitch; srcOffs += rowPitch; } } // Unmap the device memory dstMemory.cpuUnmap(vDev_); return true; } bool HostBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, const amd::Coord3D& size, bool entire) const { // Map source memory void* src = srcMemory.cpuMap(vDev_, // Overlap detection (&srcMemory == &dstMemory) ? 0 : Memory::CpuReadOnly); if (src == NULL) { LogError("Couldn't map source memory"); return false; } // Map destination memory void* dst = dstMemory.cpuMap(vDev_, (entire) ? Memory::CpuWriteOnly : 0); if (dst == NULL) { LogError("Couldn't map destination memory"); return false; } // Straight forward buffer copy amd::Os::fastMemcpy((reinterpret_cast

(dst) + dstOrigin[0]), (reinterpret_cast(src) + srcOrigin[0]), size[0]); // Unmap source and destination memory dstMemory.cpuUnmap(vDev_); srcMemory.cpuUnmap(vDev_); return true; } bool HostBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& dstMemory, const amd::BufferRect& srcRect, const amd::BufferRect& dstRect, const amd::Coord3D& size, bool entire) const { // Map source memory void* src = srcMemory.cpuMap(vDev_, // Overlap detection (&srcMemory == &dstMemory) ? 0 : Memory::CpuReadOnly); if (src == NULL) { LogError("Couldn't map source memory"); return false; } // Map destination memory void* dst = dstMemory.cpuMap(vDev_, (entire) ? Memory::CpuWriteOnly : 0); if (dst == NULL) { LogError("Couldn't map destination memory"); return false; } for (size_t z = 0; z < size[2]; ++z) { for (size_t y = 0; y < size[1]; ++y) { size_t srcOffset = srcRect.offset(0, y, z); size_t dstOffset = dstRect.offset(0, y, z); // Copy memory line by line amd::Os::fastMemcpy((reinterpret_cast

(dst) + dstOffset), (reinterpret_cast(src) + srcOffset), size[0]); } } // Unmap source and destination memory dstMemory.cpuUnmap(vDev_); srcMemory.cpuUnmap(vDev_); return true; } bool HostBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, const amd::Coord3D& size, bool entire, size_t rowPitch, size_t slicePitch) const { size_t startLayer = srcOrigin[2]; size_t numLayers = size[2]; if (srcMemory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) { startLayer = srcOrigin[1]; numLayers = size[1]; } // rowPitch and slicePitch in bytes size_t srcRowPitch; size_t srcSlicePitch; // Map source memory void* src = srcMemory.cpuMap(vDev_, Memory::CpuReadOnly, startLayer, numLayers, &srcRowPitch, &srcSlicePitch); if (src == NULL) { LogError("Couldn't map source memory"); return false; } size_t elementSize = srcMemory.owner()->asImage()->getImageFormat().getElementSize(); // Map destination memory void* dst = dstMemory.cpuMap(vDev_, (entire) ? Memory::CpuWriteOnly : 0); if (dst == NULL) { LogError("Couldn't map destination memory"); return false; } size_t srcOffs = srcOrigin[0]; size_t dstOffs = dstOrigin[0]; size_t srcOffsOrg; size_t copySize = size[0]; // Calculate the offset in bytes srcOffs *= elementSize; copySize *= elementSize; // Adjust source offset with Y and Z dimensions srcOffs += srcRowPitch * srcOrigin[1]; srcOffs += srcSlicePitch * srcOrigin[2]; srcOffsOrg = srcOffs; // Copy memory slice by slice for (size_t slice = 0; slice < size[2]; ++slice) { srcOffs = srcOffsOrg + slice * srcSlicePitch; // Copy memory line by line for (size_t rows = 0; rows < size[1]; ++rows) { amd::Os::fastMemcpy((reinterpret_cast

(dst) + dstOffs), (reinterpret_cast(src) + srcOffs), copySize); srcOffs += srcRowPitch; dstOffs += copySize; } } // Unmap source and destination memory srcMemory.cpuUnmap(vDev_); dstMemory.cpuUnmap(vDev_); return true; } bool HostBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, const amd::Coord3D& size, bool entire, size_t rowPitch, size_t slicePitch) const { // Map source memory void* src = srcMemory.cpuMap(vDev_, Memory::CpuReadOnly); if (src == NULL) { LogError("Couldn't map source memory"); return false; } size_t startLayer = dstOrigin[2]; size_t numLayers = size[2]; if (dstMemory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) { startLayer = dstOrigin[1]; numLayers = size[1]; } // rowPitch and slicePitch in bytes size_t dstRowPitch; size_t dstSlicePitch; // Map destination memory void* dst = dstMemory.cpuMap(vDev_, (entire) ? Memory::CpuWriteOnly : 0, startLayer, numLayers, &dstRowPitch, &dstSlicePitch); if (dst == NULL) { LogError("Couldn't map destination memory"); return false; } size_t elementSize = dstMemory.owner()->asImage()->getImageFormat().getElementSize(); size_t srcOffs = srcOrigin[0]; size_t dstOffs = dstOrigin[0]; size_t dstOffsOrg; size_t copySize = size[0]; // Calculate the offset in bytes dstOffs *= elementSize; copySize *= elementSize; // Adjust destination offset with Y and Z dimension dstOffs += dstRowPitch * dstOrigin[1]; dstOffs += dstSlicePitch * dstOrigin[2]; dstOffsOrg = dstOffs; // Copy memory slice by slice for (size_t slice = 0; slice < size[2]; ++slice) { dstOffs = dstOffsOrg + slice * dstSlicePitch; // Copy memory line by line for (size_t rows = 0; rows < size[1]; ++rows) { amd::Os::fastMemcpy((reinterpret_cast

(dst) + dstOffs), (reinterpret_cast(src) + srcOffs), copySize); srcOffs += copySize; dstOffs += dstRowPitch; } } // Unmap source and destination memory srcMemory.cpuUnmap(vDev_); dstMemory.cpuUnmap(vDev_); return true; } bool HostBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, const amd::Coord3D& size, bool entire) const { size_t startLayer = srcOrigin[2]; size_t numLayers = size[2]; if (srcMemory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) { startLayer = srcOrigin[1]; numLayers = size[1]; } // rowPitch and slicePitch in bytes size_t srcRowPitch; size_t srcSlicePitch; // Map source memory void* src = srcMemory.cpuMap(vDev_, Memory::CpuReadOnly, startLayer, numLayers, &srcRowPitch, &srcSlicePitch); if (src == NULL) { LogError("Couldn't map source memory"); return false; } if (dstMemory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) { startLayer = dstOrigin[1]; numLayers = size[1]; } else { startLayer = dstOrigin[2]; numLayers = size[2]; } // rowPitch and slicePitch in bytes size_t dstRowPitch; size_t dstSlicePitch; // Map destination memory void* dst = dstMemory.cpuMap(vDev_, (entire) ? Memory::CpuWriteOnly : 0, startLayer, numLayers, &dstRowPitch, &dstSlicePitch); if (dst == NULL) { LogError("Couldn't map destination memory"); return false; } size_t elementSize = dstMemory.owner()->asImage()->getImageFormat().getElementSize(); assert(elementSize == srcMemory.owner()->asImage()->getImageFormat().getElementSize()); size_t srcOffs = srcOrigin[0]; size_t dstOffs = dstOrigin[0]; size_t srcOffsOrg; size_t dstOffsOrg; size_t copySize = size[0]; // Calculate the offsets in bytes srcOffs *= elementSize; dstOffs *= elementSize; copySize *= elementSize; // Adjust destination and sorce offsets with Y dimension srcOffs += srcRowPitch * srcOrigin[1]; dstOffs += dstRowPitch * dstOrigin[1]; // Adjust destination and sorce offsets with Z dimension srcOffs += srcSlicePitch * srcOrigin[2]; dstOffs += dstSlicePitch * dstOrigin[2]; srcOffsOrg = srcOffs; dstOffsOrg = dstOffs; // Copy memory slice by slice for (size_t slice = 0; slice < size[2]; ++slice) { srcOffs = srcOffsOrg + slice * srcSlicePitch; dstOffs = dstOffsOrg + slice * dstSlicePitch; // Copy memory line by line for (size_t rows = 0; rows < size[1]; ++rows) { amd::Os::fastMemcpy((reinterpret_cast

(dst) + dstOffs), (reinterpret_cast(src) + srcOffs), copySize); srcOffs += srcRowPitch; dstOffs += dstRowPitch; } } // Unmap source and destination memory srcMemory.cpuUnmap(vDev_); dstMemory.cpuUnmap(vDev_); return true; } bool HostBlitManager::fillBuffer(device::Memory& memory, const void* pattern, size_t patternSize, const amd::Coord3D& origin, const amd::Coord3D& size, bool entire) const { // Map memory void* fillMem = memory.cpuMap(vDev_, (entire) ? Memory::CpuWriteOnly : 0); if (fillMem == NULL) { LogError("Couldn't map destination memory"); return false; } size_t offset = origin[0]; size_t fillSize = size[0]; if ((fillSize % patternSize) != 0) { LogError("Misaligned buffer size and pattern size!"); } // Fill the buffer memory with a pattern for (size_t i = 0; i < (fillSize / patternSize); i++) { memcpy((reinterpret_cast

(fillMem) + offset), (reinterpret_cast(pattern)), patternSize); offset += patternSize; } // Unmap source and destination memory memory.cpuUnmap(vDev_); return true; } bool HostBlitManager::fillImage(device::Memory& memory, const void* pattern, const amd::Coord3D& origin, const amd::Coord3D& size, bool entire) const { size_t startLayer = origin[2]; size_t numLayers = size[2]; if (memory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) { startLayer = origin[1]; numLayers = size[1]; } // rowPitch and slicePitch in bytes size_t devRowPitch; size_t devSlicePitch; void* newpattern = const_cast(pattern); cl_float4 fFillColor; // Converting a linear RGB floating-point color value to a normalized 8-bit unsigned integer sRGB // value so that the cpu path can treat sRGB as RGB for host transfer. if (memory.owner()->asImage()->getImageFormat().image_channel_order == CL_sRGBA) { float* fColor = static_cast(newpattern); fFillColor.s[0] = sRGBmap(fColor[0]) / 255.0f; fFillColor.s[1] = sRGBmap(fColor[1]) / 255.0f; fFillColor.s[2] = sRGBmap(fColor[2]) / 255.0f; fFillColor.s[3] = fColor[3]; newpattern = static_cast(&fFillColor); } // Map memory void* fillMem = memory.cpuMap(vDev_, (entire) ? Memory::CpuWriteOnly : 0, startLayer, numLayers, &devRowPitch, &devSlicePitch); if (fillMem == NULL) { LogError("Couldn't map destination memory"); return false; } float fillValue[4]; memset(fillValue, 0, sizeof(fillValue)); memory.owner()->asImage()->getImageFormat().formatColor(newpattern, fillValue); size_t elementSize = memory.owner()->asImage()->getImageFormat().getElementSize(); size_t offset = origin[0] * elementSize; size_t offsetOrg; // Adjust offset with Y dimension offset += devRowPitch * origin[1]; // Adjust offset with Z dimension offset += devSlicePitch * origin[2]; offsetOrg = offset; // Fill the image memory with a pattern for (size_t slice = 0; slice < size[2]; ++slice) { offset = offsetOrg + slice * devSlicePitch; for (size_t rows = 0; rows < size[1]; ++rows) { size_t pixOffset = offset; // Copy memory pixel by pixel for (size_t column = 0; column < size[0]; ++column) { memcpy((reinterpret_cast

(fillMem) + pixOffset), (reinterpret_cast(fillValue)), elementSize); pixOffset += elementSize; } offset += devRowPitch; } } // Unmap memory memory.cpuUnmap(vDev_); return true; } cl_uint HostBlitManager::sRGBmap(float fc) const { double c = (double)fc; #ifdef ATI_OS_LINUX if (std::isnan(c)) c = 0.0; #else if (_isnan(c)) c = 0.0; #endif if (c > 1.0) c = 1.0; else if (c < 0.0) c = 0.0; else if (c < 0.0031308) c = 12.92 * c; else c = (1055.0 / 1000.0) * pow(c, 5.0 / 12.0) - (55.0 / 1000.0); return (cl_uint)(c * 255.0 + 0.5); } } // namespace gpu