P4 to Git Change 1077444 by gandryey@gera-dev-w7 on 2014/09/16 14:31:35
ECR #304775 - Add capability to enable large allocations >4GB - Update the blit kernels to consider a buffer size >4GB Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/blitcl.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.cpp#110 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#280 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsablit.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#214 edit
Этот коммит содержится в:
@@ -11,13 +11,13 @@ const char* BlitSourceCode = BLIT_KERNELS(
|
||||
__kernel void copyBufferToImage(
|
||||
__global uint* src,
|
||||
__write_only image2d_array_t dst,
|
||||
int4 srcOrigin,
|
||||
ulong4 srcOrigin,
|
||||
int4 dstOrigin,
|
||||
int4 size,
|
||||
int4 format,
|
||||
int4 pitch)
|
||||
uint4 format,
|
||||
ulong4 pitch)
|
||||
{
|
||||
uint idxSrc;
|
||||
ulong idxSrc;
|
||||
int4 coordsDst;
|
||||
uint4 pixel;
|
||||
__global uint* srcUInt = src;
|
||||
@@ -49,61 +49,55 @@ __kernel void copyBufferToImage(
|
||||
switch (format.x) {
|
||||
case 1:
|
||||
// Check size
|
||||
switch (format.y) {
|
||||
case 1:
|
||||
if (format.y == 1) {
|
||||
pixel.x = (uint)srcUChar[idxSrc];
|
||||
break;
|
||||
case 2:
|
||||
}
|
||||
else if (format.y == 2) {
|
||||
pixel.x = (uint)srcUShort[idxSrc];
|
||||
break;
|
||||
case 4:
|
||||
}
|
||||
else {
|
||||
pixel.x = srcUInt[idxSrc];
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
// Check size
|
||||
switch (format.y) {
|
||||
case 1:
|
||||
if (format.y == 1) {
|
||||
tmpUShort = srcUShort[idxSrc];
|
||||
pixel.x = (uint)(tmpUShort & 0xff);
|
||||
pixel.y = (uint)(tmpUShort >> 8);
|
||||
break;
|
||||
case 2:
|
||||
}
|
||||
else if (format.y == 2) {
|
||||
tmpUInt = srcUInt[idxSrc];
|
||||
pixel.x = (tmpUInt & 0xffff);
|
||||
pixel.y = (tmpUInt >> 16);
|
||||
break;
|
||||
case 4:
|
||||
}
|
||||
else {
|
||||
pixel.x = srcUInt[idxSrc++];
|
||||
pixel.y = srcUInt[idxSrc];
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
// Check size
|
||||
switch (format.y) {
|
||||
case 1:
|
||||
if (format.y == 1) {
|
||||
tmpUInt = srcUInt[idxSrc];
|
||||
pixel.x = tmpUInt & 0xff;
|
||||
pixel.y = (tmpUInt >> 8) & 0xff;
|
||||
pixel.z = (tmpUInt >> 16) & 0xff;
|
||||
pixel.w = (tmpUInt >> 24) & 0xff;
|
||||
break;
|
||||
case 2:
|
||||
}
|
||||
else if (format.y == 2) {
|
||||
tmpUInt = srcUInt[idxSrc++];
|
||||
pixel.x = tmpUInt & 0xffff;
|
||||
pixel.y = (tmpUInt >> 16);
|
||||
tmpUInt = srcUInt[idxSrc];
|
||||
pixel.z = tmpUInt & 0xffff;
|
||||
pixel.w = (tmpUInt >> 16);
|
||||
break;
|
||||
case 4:
|
||||
}
|
||||
else {
|
||||
pixel.x = srcUInt[idxSrc++];
|
||||
pixel.y = srcUInt[idxSrc++];
|
||||
pixel.z = srcUInt[idxSrc++];
|
||||
pixel.w = srcUInt[idxSrc];
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -117,12 +111,12 @@ __kernel void copyImageToBuffer(
|
||||
__global ushort* dstUShort,
|
||||
__global uchar* dstUChar,
|
||||
int4 srcOrigin,
|
||||
int4 dstOrigin,
|
||||
ulong4 dstOrigin,
|
||||
int4 size,
|
||||
int4 format,
|
||||
int4 pitch)
|
||||
uint4 format,
|
||||
ulong4 pitch)
|
||||
{
|
||||
uint idxDst;
|
||||
ulong idxDst;
|
||||
int4 coordsSrc;
|
||||
uint4 texel;
|
||||
|
||||
@@ -205,12 +199,12 @@ __kernel void copyImageToBuffer(
|
||||
__kernel void copyImage(
|
||||
__read_only image2d_array_t src,
|
||||
__write_only image2d_array_t dst,
|
||||
int4 srcOrigin,
|
||||
int4 dstOrigin,
|
||||
int4 size)
|
||||
int4 srcOrigin,
|
||||
int4 dstOrigin,
|
||||
int4 size)
|
||||
{
|
||||
int4 coordsDst;
|
||||
int4 coordsSrc;
|
||||
int4 coordsDst;
|
||||
int4 coordsSrc;
|
||||
|
||||
coordsDst.x = get_global_id(0);
|
||||
coordsDst.y = get_global_id(1);
|
||||
@@ -234,12 +228,12 @@ __kernel void copyImage(
|
||||
__kernel void copyImage1DA(
|
||||
__read_only image2d_array_t src,
|
||||
__write_only image2d_array_t dst,
|
||||
int4 srcOrigin,
|
||||
int4 dstOrigin,
|
||||
int4 size)
|
||||
int4 srcOrigin,
|
||||
int4 dstOrigin,
|
||||
int4 size)
|
||||
{
|
||||
int4 coordsDst;
|
||||
int4 coordsSrc;
|
||||
int4 coordsDst;
|
||||
int4 coordsSrc;
|
||||
|
||||
coordsDst.x = get_global_id(0);
|
||||
coordsDst.y = get_global_id(1);
|
||||
@@ -271,13 +265,13 @@ __kernel void copyImage1DA(
|
||||
__kernel void copyBufferRect(
|
||||
__global uchar* src,
|
||||
__global uchar* dst,
|
||||
uint4 srcRect,
|
||||
uint4 dstRect,
|
||||
uint4 size)
|
||||
ulong4 srcRect,
|
||||
ulong4 dstRect,
|
||||
ulong4 size)
|
||||
{
|
||||
uint x = (uint)get_global_id(0);
|
||||
uint y = (uint)get_global_id(1);
|
||||
uint z = (uint)get_global_id(2);
|
||||
ulong x = get_global_id(0);
|
||||
ulong y = get_global_id(1);
|
||||
ulong z = get_global_id(2);
|
||||
|
||||
if ((x >= size.x) ||
|
||||
(y >= size.y) ||
|
||||
@@ -285,8 +279,8 @@ __kernel void copyBufferRect(
|
||||
return;
|
||||
}
|
||||
|
||||
uint offsSrc = srcRect.z + x + y * srcRect.x + z * srcRect.y;
|
||||
uint offsDst = dstRect.z + x + y * dstRect.x + z * dstRect.y;
|
||||
ulong offsSrc = srcRect.z + x + y * srcRect.x + z * srcRect.y;
|
||||
ulong offsDst = dstRect.z + x + y * dstRect.x + z * dstRect.y;
|
||||
|
||||
dst[offsDst] = src[offsSrc];
|
||||
}
|
||||
@@ -294,13 +288,13 @@ __kernel void copyBufferRect(
|
||||
__kernel void copyBufferRectAligned(
|
||||
__global uint* src,
|
||||
__global uint* dst,
|
||||
uint4 srcRect,
|
||||
uint4 dstRect,
|
||||
uint4 size)
|
||||
ulong4 srcRect,
|
||||
ulong4 dstRect,
|
||||
ulong4 size)
|
||||
{
|
||||
uint x = (uint)get_global_id(0);
|
||||
uint y = (uint)get_global_id(1);
|
||||
uint z = (uint)get_global_id(2);
|
||||
ulong x = get_global_id(0);
|
||||
ulong y = get_global_id(1);
|
||||
ulong z = get_global_id(2);
|
||||
|
||||
if ((x >= size.x) ||
|
||||
(y >= size.y) ||
|
||||
@@ -308,8 +302,8 @@ __kernel void copyBufferRectAligned(
|
||||
return;
|
||||
}
|
||||
|
||||
uint offsSrc = srcRect.z + x + y * srcRect.x + z * srcRect.y;
|
||||
uint offsDst = dstRect.z + x + y * dstRect.x + z * dstRect.y;
|
||||
ulong offsSrc = srcRect.z + x + y * srcRect.x + z * srcRect.y;
|
||||
ulong offsDst = dstRect.z + x + y * dstRect.x + z * dstRect.y;
|
||||
|
||||
if (size.w == 16) {
|
||||
__global uint4* src4 = (__global uint4*)src;
|
||||
@@ -324,18 +318,18 @@ __kernel void copyBufferRectAligned(
|
||||
__kernel void copyBuffer(
|
||||
__global uchar* src,
|
||||
__global uchar* dst,
|
||||
int srcOrigin,
|
||||
int dstOrigin,
|
||||
uint size)
|
||||
ulong srcOrigin,
|
||||
ulong dstOrigin,
|
||||
ulong size)
|
||||
{
|
||||
uint id = (uint)get_global_id(0);
|
||||
ulong id = get_global_id(0);
|
||||
|
||||
if (id >= size) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint offsSrc = id + srcOrigin;
|
||||
uint offsDst = id + dstOrigin;
|
||||
ulong offsSrc = id + srcOrigin;
|
||||
ulong offsDst = id + dstOrigin;
|
||||
|
||||
dst[offsDst] = src[offsSrc];
|
||||
}
|
||||
@@ -343,19 +337,19 @@ __kernel void copyBuffer(
|
||||
__kernel void copyBufferAligned(
|
||||
__global uint* src,
|
||||
__global uint* dst,
|
||||
int srcOrigin,
|
||||
int dstOrigin,
|
||||
uint size,
|
||||
uint alignment)
|
||||
ulong srcOrigin,
|
||||
ulong dstOrigin,
|
||||
ulong size,
|
||||
uint alignment)
|
||||
{
|
||||
uint id = (uint)get_global_id(0);
|
||||
ulong id = get_global_id(0);
|
||||
|
||||
if (id >= size) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint offsSrc = id + srcOrigin;
|
||||
uint offsDst = id + dstOrigin;
|
||||
ulong offsSrc = id + srcOrigin;
|
||||
ulong offsDst = id + dstOrigin;
|
||||
|
||||
if (alignment == 16) {
|
||||
__global uint4* src4 = (__global uint4*)src;
|
||||
@@ -371,11 +365,11 @@ __kernel void fillBuffer(
|
||||
__global uchar* bufUChar,
|
||||
__global uint* bufUInt,
|
||||
__constant uchar* pattern,
|
||||
uint patternSize,
|
||||
uint offset,
|
||||
uint size)
|
||||
uint patternSize,
|
||||
ulong offset,
|
||||
ulong size)
|
||||
{
|
||||
uint id = (uint)get_global_id(0);
|
||||
ulong id = get_global_id(0);
|
||||
|
||||
if (id >= size) {
|
||||
return;
|
||||
|
||||
@@ -1140,7 +1140,7 @@ KernelBlitManager::copyBufferToImage(
|
||||
|
||||
void
|
||||
CalcRowSlicePitches(
|
||||
cl_int* pitch, const cl_int* copySize,
|
||||
cl_ulong* pitch, const cl_int* copySize,
|
||||
size_t rowPitch, size_t slicePitch, const Memory& mem)
|
||||
{
|
||||
size_t memFmtSize = memoryFormatSize(mem.cal()->format_).size_;
|
||||
@@ -1317,17 +1317,17 @@ KernelBlitManager::copyBufferToImageKernel(
|
||||
granularity = 4;
|
||||
}
|
||||
CondLog(((srcOrigin[0] % granularity) != 0), "Unaligned offset in blit!");
|
||||
cl_int srcOrg[4] = { (cl_int)srcOrigin[0] / granularity,
|
||||
(cl_int)srcOrigin[1],
|
||||
(cl_int)srcOrigin[2], 0 };
|
||||
cl_ulong srcOrg[4] = { srcOrigin[0] / granularity,
|
||||
srcOrigin[1],
|
||||
srcOrigin[2], 0 };
|
||||
setArgument(kernels_[blitType], 2, sizeof(srcOrg), srcOrg);
|
||||
|
||||
cl_int dstOrg[4] = { (cl_int)dstOrigin[0],
|
||||
(cl_int)dstOrigin[1],
|
||||
(cl_int)dstOrigin[2], 0 };
|
||||
cl_int copySize[4] = { (cl_int)size[0],
|
||||
(cl_int)size[1],
|
||||
(cl_int)size[2], 0 };
|
||||
cl_int dstOrg[4] = { (cl_int)dstOrigin[0],
|
||||
(cl_int)dstOrigin[1],
|
||||
(cl_int)dstOrigin[2], 0 };
|
||||
cl_int copySize[4] = { (cl_int)size[0],
|
||||
(cl_int)size[1],
|
||||
(cl_int)size[2], 0 };
|
||||
if (swapLayer) {
|
||||
dstOrg[2] = dstOrg[1];
|
||||
dstOrg[1] = 0;
|
||||
@@ -1340,13 +1340,13 @@ KernelBlitManager::copyBufferToImageKernel(
|
||||
// Program memory format
|
||||
uint multiplier = memFmt.size_ / sizeof(uint32_t);
|
||||
multiplier = (multiplier == 0) ? 1 : multiplier;
|
||||
cl_int format[4] = { (cl_int)memFmt.components_,
|
||||
(cl_int)memFmt.size_ / (cl_int)memFmt.components_,
|
||||
(cl_int)multiplier, 0 };
|
||||
cl_uint format[4] = { memFmt.components_,
|
||||
memFmt.size_ / memFmt.components_,
|
||||
multiplier, 0 };
|
||||
setArgument(kernels_[blitType], 5, sizeof(format), format);
|
||||
|
||||
// Program row and slice pitches
|
||||
cl_int pitch[4] = { 0 };
|
||||
cl_ulong pitch[4] = { 0 };
|
||||
CalcRowSlicePitches(pitch, copySize, rowPitch, slicePitch, gpuMem(dstMemory));
|
||||
setArgument(kernels_[blitType], 6, sizeof(pitch), pitch);
|
||||
|
||||
@@ -1661,12 +1661,12 @@ KernelBlitManager::copyImageToBufferKernel(
|
||||
setArgument(kernels_[blitType], 2, sizeof(cl_mem), &mem);
|
||||
setArgument(kernels_[blitType], 3, sizeof(cl_mem), &mem);
|
||||
|
||||
cl_int srcOrg[4] = { (cl_int)srcOrigin[0],
|
||||
(cl_int)srcOrigin[1],
|
||||
(cl_int)srcOrigin[2], 0 };
|
||||
cl_int copySize[4] = { (cl_int)size[0],
|
||||
(cl_int)size[1],
|
||||
(cl_int)size[2], 0 };
|
||||
cl_int srcOrg[4] = { (cl_int)srcOrigin[0],
|
||||
(cl_int)srcOrigin[1],
|
||||
(cl_int)srcOrigin[2], 0 };
|
||||
cl_int copySize[4] = { (cl_int)size[0],
|
||||
(cl_int)size[1],
|
||||
(cl_int)size[2], 0 };
|
||||
if (swapLayer) {
|
||||
srcOrg[2] = srcOrg[1];
|
||||
srcOrg[1] = 0;
|
||||
@@ -1685,22 +1685,22 @@ KernelBlitManager::copyImageToBufferKernel(
|
||||
granularity = 4;
|
||||
}
|
||||
CondLog(((dstOrigin[0] % granularity) != 0), "Unaligned offset in blit!");
|
||||
cl_int dstOrg[4] = { (cl_int)dstOrigin[0] / granularity,
|
||||
(cl_int)dstOrigin[1],
|
||||
(cl_int)dstOrigin[2], 0 };
|
||||
cl_ulong dstOrg[4] = { dstOrigin[0] / granularity,
|
||||
dstOrigin[1],
|
||||
dstOrigin[2], 0 };
|
||||
setArgument(kernels_[blitType], 5, sizeof(dstOrg), dstOrg);
|
||||
setArgument(kernels_[blitType], 6, sizeof(copySize), copySize);
|
||||
|
||||
// Program memory format
|
||||
uint multiplier = memFmt.size_ / sizeof(uint32_t);
|
||||
multiplier = (multiplier == 0) ? 1 : multiplier;
|
||||
cl_int format[4] = { (cl_int)memFmt.components_,
|
||||
(cl_int)memFmt.size_ / (cl_int)memFmt.components_,
|
||||
(cl_int)multiplier, 0 };
|
||||
cl_uint format[4] = { memFmt.components_,
|
||||
memFmt.size_ / memFmt.components_,
|
||||
multiplier, 0 };
|
||||
setArgument(kernels_[blitType], 7, sizeof(format), format);
|
||||
|
||||
// Program row and slice pitches
|
||||
cl_int pitch[4] = { 0 };
|
||||
cl_ulong pitch[4] = { 0 };
|
||||
CalcRowSlicePitches(pitch, copySize, rowPitch, slicePitch, gpuMem(srcMemory));
|
||||
setArgument(kernels_[blitType], 8, sizeof(pitch), pitch);
|
||||
|
||||
@@ -2130,18 +2130,15 @@ KernelBlitManager::copyBufferRect(
|
||||
setArgument(kernels_[blitType], 0, sizeof(cl_mem), &mem);
|
||||
mem = &gpuMem(dstMemory);
|
||||
setArgument(kernels_[blitType], 1, sizeof(cl_mem), &mem);
|
||||
cl_uint src[4] = { (cl_uint)srcRect.rowPitch_,
|
||||
(cl_uint)srcRect.slicePitch_,
|
||||
(cl_uint)srcRect.start_, 0 };
|
||||
cl_ulong src[4] = { srcRect.rowPitch_,
|
||||
srcRect.slicePitch_,
|
||||
srcRect.start_, 0 };
|
||||
setArgument(kernels_[blitType], 2, sizeof(src), src);
|
||||
cl_uint dst[4] = { (cl_uint)dstRect.rowPitch_,
|
||||
(cl_uint)dstRect.slicePitch_,
|
||||
(cl_uint)dstRect.start_, 0 };
|
||||
cl_ulong dst[4] = { dstRect.rowPitch_,
|
||||
dstRect.slicePitch_,
|
||||
dstRect.start_, 0 };
|
||||
setArgument(kernels_[blitType], 3, sizeof(dst), dst);
|
||||
cl_int copySize[4] = { (cl_int)size[0],
|
||||
(cl_int)size[1],
|
||||
(cl_int)size[2],
|
||||
(cl_int)CopyRectAlignment[i] };
|
||||
cl_ulong copySize[4] = { size[0], size[1], size[2], CopyRectAlignment[i] };
|
||||
setArgument(kernels_[blitType], 4, sizeof(copySize), copySize);
|
||||
|
||||
// Create ND range object for the kernel's execution
|
||||
@@ -2426,7 +2423,7 @@ KernelBlitManager::fillBuffer(
|
||||
else {
|
||||
uint fillType = FillBuffer;
|
||||
size_t globalWorkOffset[3] = { 0, 0, 0 };
|
||||
cl_int fillSize = size[0] / patternSize;
|
||||
cl_ulong fillSize = size[0] / patternSize;
|
||||
size_t globalWorkSize = amd::alignUp(fillSize, 256);
|
||||
size_t localWorkSize = 256;
|
||||
bool dwordAligned =
|
||||
@@ -2450,7 +2447,7 @@ KernelBlitManager::fillBuffer(
|
||||
memcpy(constBuf, pattern, patternSize);
|
||||
gpuCB->unmap(&gpu());
|
||||
setArgument(kernels_[fillType], 2, sizeof(cl_mem), &gpuCB);
|
||||
cl_int offset = origin[0];
|
||||
cl_ulong offset = origin[0];
|
||||
if (dwordAligned) {
|
||||
patternSize /= sizeof(uint32_t);
|
||||
offset /= sizeof(uint32_t);
|
||||
@@ -2528,14 +2525,14 @@ KernelBlitManager::copyBuffer(
|
||||
mem = &gpuMem(dstMemory);
|
||||
setArgument(kernels_[blitType], 1, sizeof(cl_mem), &mem);
|
||||
// Program source origin
|
||||
cl_int srcOffset = srcOrigin[0] / CopyBuffAlignment[i];;
|
||||
cl_ulong srcOffset = srcOrigin[0] / CopyBuffAlignment[i];;
|
||||
setArgument(kernels_[blitType], 2, sizeof(srcOffset), &srcOffset);
|
||||
|
||||
// Program destinaiton origin
|
||||
cl_int dstOffset = dstOrigin[0] / CopyBuffAlignment[i];;
|
||||
cl_ulong dstOffset = dstOrigin[0] / CopyBuffAlignment[i];;
|
||||
setArgument(kernels_[blitType], 3, sizeof(dstOffset), &dstOffset);
|
||||
|
||||
cl_int copySize = size[0];
|
||||
cl_ulong copySize = size[0];
|
||||
setArgument(kernels_[blitType], 4, sizeof(copySize), ©Size);
|
||||
|
||||
if (blitType == BlitCopyBufferAligned) {
|
||||
|
||||
@@ -320,7 +320,12 @@ Settings::create(
|
||||
supportDepthsRGB_ = true;
|
||||
}
|
||||
if (use64BitPtr_) {
|
||||
maxAllocSize_ = 4048 * Mi;
|
||||
if (GPU_ENABLE_LARGE_ALLOCATION) {
|
||||
maxAllocSize_ = 16ULL * Gi;
|
||||
}
|
||||
else {
|
||||
maxAllocSize_ = 4048 * Mi;
|
||||
}
|
||||
}
|
||||
else {
|
||||
maxAllocSize_ = 3ULL * Gi;
|
||||
|
||||
@@ -566,7 +566,7 @@ HsaBlitManager::importExportImage(
|
||||
|
||||
static void
|
||||
CalcRowSlicePitches(
|
||||
cl_int* pitch, const cl_int* copySize,
|
||||
cl_ulong* pitch, const cl_int* copySize,
|
||||
size_t rowPitch, size_t slicePitch, const Memory& mem)
|
||||
{
|
||||
const oclhsa::Image &hsaImage = static_cast< const oclhsa::Image &>(mem);
|
||||
@@ -1067,14 +1067,14 @@ KernelBlitManager::copyBuffer(
|
||||
clmem = ((cl_mem) as_cl<amd::Memory>(dstMemory.owner()));
|
||||
kernels_[blitType]->parameters().set(1, sizeof(cl_mem), &clmem);
|
||||
// Program source origin
|
||||
cl_int srcOffset = srcOrigin[0] / CopyBuffAlignment[i];
|
||||
cl_ulong srcOffset = srcOrigin[0] / CopyBuffAlignment[i];
|
||||
kernels_[blitType]->parameters().set(2, sizeof(srcOffset), &srcOffset);
|
||||
|
||||
// Program destinaiton origin
|
||||
cl_int dstOffset = dstOrigin[0] / CopyBuffAlignment[i];
|
||||
cl_ulong dstOffset = dstOrigin[0] / CopyBuffAlignment[i];
|
||||
kernels_[blitType]->parameters().set(3, sizeof(dstOffset), &dstOffset);
|
||||
|
||||
cl_int copySize = size[0];
|
||||
cl_ulong copySize = size[0];
|
||||
kernels_[blitType]->parameters().set(4, sizeof(copySize), ©Size);
|
||||
|
||||
if (blitType == BlitCopyBufferAligned) {
|
||||
@@ -1187,18 +1187,18 @@ KernelBlitManager::copyBufferRect(
|
||||
kernels_[blitType]->parameters().set(0, sizeof(cl_mem), &clmem);
|
||||
clmem = ((cl_mem) as_cl<amd::Memory>(dstMemory.owner()));
|
||||
kernels_[blitType]->parameters().set(1, sizeof(cl_mem), &clmem);
|
||||
cl_uint src[4] = { (cl_uint)srcRect.rowPitch_,
|
||||
(cl_uint)srcRect.slicePitch_,
|
||||
(cl_uint)srcRect.start_, 0 };
|
||||
cl_ulong src[4] = { srcRect.rowPitch_,
|
||||
srcRect.slicePitch_,
|
||||
srcRect.start_, 0 };
|
||||
kernels_[blitType]->parameters().set(2, sizeof(src), src);
|
||||
cl_uint dst[4] = { (cl_uint)dstRect.rowPitch_,
|
||||
(cl_uint)dstRect.slicePitch_,
|
||||
(cl_uint)dstRect.start_, 0 };
|
||||
cl_ulong dst[4] = { dstRect.rowPitch_,
|
||||
dstRect.slicePitch_,
|
||||
dstRect.start_, 0 };
|
||||
kernels_[blitType]->parameters().set(3, sizeof(dst), dst);
|
||||
cl_int copySize[4] = { (cl_int)size[0],
|
||||
(cl_int)size[1],
|
||||
(cl_int)size[2],
|
||||
(cl_int)CopyRectAlignment[i] };
|
||||
cl_ulong copySize[4] = { size[0],
|
||||
size[1],
|
||||
size[2],
|
||||
CopyRectAlignment[i] };
|
||||
kernels_[blitType]->parameters().set(4, sizeof(copySize), copySize);
|
||||
|
||||
// Create ND range object for the kernel's execution
|
||||
@@ -1311,23 +1311,23 @@ KernelBlitManager::copyImageToBuffer(
|
||||
granularity = 4;
|
||||
}
|
||||
CondLog(((dstOrigin[0] % granularity) != 0), "Unaligned offset in blit!");
|
||||
cl_int dstOrg[4] = { (cl_int)dstOrigin[0] / granularity,
|
||||
(cl_int)dstOrigin[1],
|
||||
(cl_int)dstOrigin[2],
|
||||
0 };
|
||||
cl_ulong dstOrg[4] = { dstOrigin[0] / granularity,
|
||||
dstOrigin[1],
|
||||
dstOrigin[2],
|
||||
0 };
|
||||
kernels_[blitType]->parameters().set(5, sizeof(dstOrg), dstOrg);
|
||||
kernels_[blitType]->parameters().set(6, sizeof(copySize), copySize);
|
||||
|
||||
// Program memory format
|
||||
uint multiplier = elementSize / sizeof(uint32_t);
|
||||
multiplier = (multiplier == 0) ? 1 : multiplier;
|
||||
cl_int format[4] = { (cl_int)numChannels,
|
||||
(cl_int)elementSize / (cl_int)numChannels,
|
||||
(cl_int)multiplier, 0 };
|
||||
cl_uint format[4] = { (cl_uint)numChannels,
|
||||
(cl_uint)(elementSize / numChannels),
|
||||
multiplier, 0 };
|
||||
kernels_[blitType]->parameters().set(7, sizeof(format), format);
|
||||
|
||||
// Program row and slice pitches
|
||||
cl_int pitch[4] = { 0 };
|
||||
cl_ulong pitch[4] = { 0 };
|
||||
CalcRowSlicePitches(pitch, copySize, rowPitch, slicePitch, srcImage);
|
||||
kernels_[blitType]->parameters().set(8, sizeof(pitch), pitch);
|
||||
|
||||
@@ -1429,9 +1429,9 @@ KernelBlitManager::copyBufferToImage(
|
||||
granularity = 4;
|
||||
}
|
||||
CondLog(((srcOrigin[0] % granularity) != 0), "Unaligned offset in blit!");
|
||||
cl_int srcOrg[4] = { (cl_int)srcOrigin[0] / granularity,
|
||||
(cl_int)srcOrigin[1],
|
||||
(cl_int)srcOrigin[2], 0 };
|
||||
cl_ulong srcOrg[4] = { srcOrigin[0] / granularity,
|
||||
srcOrigin[1],
|
||||
srcOrigin[2], 0 };
|
||||
kernels_[blitType]->parameters().set(2, sizeof(srcOrg), srcOrg);
|
||||
|
||||
cl_int dstOrg[4] = { (cl_int)dstOrigin[0],
|
||||
@@ -1447,13 +1447,13 @@ KernelBlitManager::copyBufferToImage(
|
||||
// Program memory format
|
||||
uint multiplier = elementSize / sizeof(uint32_t);
|
||||
multiplier = (multiplier == 0) ? 1 : multiplier;
|
||||
cl_int format[4] = { (cl_int)numChannels,
|
||||
(cl_int)elementSize / (cl_int)numChannels,
|
||||
(cl_int)multiplier, 0 };
|
||||
cl_uint format[4] = { (cl_uint)numChannels,
|
||||
(cl_uint)(elementSize / numChannels),
|
||||
multiplier, 0 };
|
||||
kernels_[blitType]->parameters().set(5, sizeof(format), format);
|
||||
|
||||
// Program row and slice pitches
|
||||
cl_int pitch[4] = { 0 };
|
||||
cl_ulong pitch[4] = { 0 };
|
||||
CalcRowSlicePitches(pitch, copySize, rowPitch, slicePitch, dstImage);
|
||||
kernels_[blitType]->parameters().set(6, sizeof(pitch), pitch);
|
||||
|
||||
@@ -1617,7 +1617,7 @@ KernelBlitManager::fillBuffer(
|
||||
|
||||
uint fillType = FillBuffer;
|
||||
size_t globalWorkOffset[3] = { 0, 0, 0 };
|
||||
cl_int fillSize = size[0] / patternSize;
|
||||
cl_ulong fillSize = size[0] / patternSize;
|
||||
size_t globalWorkSize = amd::alignUp(fillSize, 256);
|
||||
size_t localWorkSize = 256;
|
||||
bool dwordAligned =
|
||||
@@ -1650,14 +1650,14 @@ KernelBlitManager::fillBuffer(
|
||||
|
||||
cl_mem clmem = ((cl_mem) as_cl<amd::Memory>(fillMemory));
|
||||
kernels_[fillType]->parameters().set(2, sizeof(cl_mem), &clmem);
|
||||
cl_int offset = origin[0];
|
||||
cl_ulong offset = origin[0];
|
||||
if (dwordAligned) {
|
||||
patternSize /= sizeof(uint32_t);
|
||||
offset /= sizeof(uint32_t);
|
||||
}
|
||||
kernels_[fillType]->parameters().set(3, sizeof(cl_uint), &patternSize);
|
||||
kernels_[fillType]->parameters().set(4, sizeof(offset), &offset);
|
||||
kernels_[fillType]->parameters().set(5, sizeof(cl_mem), &fillSize);
|
||||
kernels_[fillType]->parameters().set(5, sizeof(fillSize), &fillSize);
|
||||
|
||||
// Create ND range object for the kernel's execution
|
||||
amd::NDRangeContainer ndrange(1,
|
||||
|
||||
@@ -152,6 +152,8 @@ release(bool, GPU_DIRECT_SRD, false, \
|
||||
"Use indirect SRD access in HSAIL") \
|
||||
release(bool, GPU_USE_DEVICE_QUEUE, false, \
|
||||
"Use a dedicated device queue for the actual submissions") \
|
||||
release(bool, GPU_ENABLE_LARGE_ALLOCATION, false, \
|
||||
"Enable >4GB single allocations") \
|
||||
release(bool, AMD_DEPTH_MSAA_INTEROP, false, \
|
||||
"Enable depth stencil and MSAA buffer interop") \
|
||||
release(bool, AMD_THREAD_TRACE_ENABLE, false, \
|
||||
|
||||
Ссылка в новой задаче
Block a user