P4 to Git Change 1077444 by gandryey@gera-dev-w7 on 2014/09/16 14:31:35

ECR #304775 - Add capability to enable large allocations >4GB
	- Update the blit kernels to consider a buffer size >4GB

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/blitcl.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.cpp#110 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#280 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsablit.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#214 edit
Этот коммит содержится в:
foreman
2014-09-16 14:43:17 -04:00
родитель ff7ab4a0b2
Коммит b672b6c4da
5 изменённых файлов: 145 добавлений и 147 удалений
+66 -72
Просмотреть файл
@@ -11,13 +11,13 @@ const char* BlitSourceCode = BLIT_KERNELS(
__kernel void copyBufferToImage(
__global uint* src,
__write_only image2d_array_t dst,
int4 srcOrigin,
ulong4 srcOrigin,
int4 dstOrigin,
int4 size,
int4 format,
int4 pitch)
uint4 format,
ulong4 pitch)
{
uint idxSrc;
ulong idxSrc;
int4 coordsDst;
uint4 pixel;
__global uint* srcUInt = src;
@@ -49,61 +49,55 @@ __kernel void copyBufferToImage(
switch (format.x) {
case 1:
// Check size
switch (format.y) {
case 1:
if (format.y == 1) {
pixel.x = (uint)srcUChar[idxSrc];
break;
case 2:
}
else if (format.y == 2) {
pixel.x = (uint)srcUShort[idxSrc];
break;
case 4:
}
else {
pixel.x = srcUInt[idxSrc];
break;
}
break;
case 2:
// Check size
switch (format.y) {
case 1:
if (format.y == 1) {
tmpUShort = srcUShort[idxSrc];
pixel.x = (uint)(tmpUShort & 0xff);
pixel.y = (uint)(tmpUShort >> 8);
break;
case 2:
}
else if (format.y == 2) {
tmpUInt = srcUInt[idxSrc];
pixel.x = (tmpUInt & 0xffff);
pixel.y = (tmpUInt >> 16);
break;
case 4:
}
else {
pixel.x = srcUInt[idxSrc++];
pixel.y = srcUInt[idxSrc];
break;
}
break;
case 4:
// Check size
switch (format.y) {
case 1:
if (format.y == 1) {
tmpUInt = srcUInt[idxSrc];
pixel.x = tmpUInt & 0xff;
pixel.y = (tmpUInt >> 8) & 0xff;
pixel.z = (tmpUInt >> 16) & 0xff;
pixel.w = (tmpUInt >> 24) & 0xff;
break;
case 2:
}
else if (format.y == 2) {
tmpUInt = srcUInt[idxSrc++];
pixel.x = tmpUInt & 0xffff;
pixel.y = (tmpUInt >> 16);
tmpUInt = srcUInt[idxSrc];
pixel.z = tmpUInt & 0xffff;
pixel.w = (tmpUInt >> 16);
break;
case 4:
}
else {
pixel.x = srcUInt[idxSrc++];
pixel.y = srcUInt[idxSrc++];
pixel.z = srcUInt[idxSrc++];
pixel.w = srcUInt[idxSrc];
break;
}
break;
}
@@ -117,12 +111,12 @@ __kernel void copyImageToBuffer(
__global ushort* dstUShort,
__global uchar* dstUChar,
int4 srcOrigin,
int4 dstOrigin,
ulong4 dstOrigin,
int4 size,
int4 format,
int4 pitch)
uint4 format,
ulong4 pitch)
{
uint idxDst;
ulong idxDst;
int4 coordsSrc;
uint4 texel;
@@ -205,12 +199,12 @@ __kernel void copyImageToBuffer(
__kernel void copyImage(
__read_only image2d_array_t src,
__write_only image2d_array_t dst,
int4 srcOrigin,
int4 dstOrigin,
int4 size)
int4 srcOrigin,
int4 dstOrigin,
int4 size)
{
int4 coordsDst;
int4 coordsSrc;
int4 coordsDst;
int4 coordsSrc;
coordsDst.x = get_global_id(0);
coordsDst.y = get_global_id(1);
@@ -234,12 +228,12 @@ __kernel void copyImage(
__kernel void copyImage1DA(
__read_only image2d_array_t src,
__write_only image2d_array_t dst,
int4 srcOrigin,
int4 dstOrigin,
int4 size)
int4 srcOrigin,
int4 dstOrigin,
int4 size)
{
int4 coordsDst;
int4 coordsSrc;
int4 coordsDst;
int4 coordsSrc;
coordsDst.x = get_global_id(0);
coordsDst.y = get_global_id(1);
@@ -271,13 +265,13 @@ __kernel void copyImage1DA(
__kernel void copyBufferRect(
__global uchar* src,
__global uchar* dst,
uint4 srcRect,
uint4 dstRect,
uint4 size)
ulong4 srcRect,
ulong4 dstRect,
ulong4 size)
{
uint x = (uint)get_global_id(0);
uint y = (uint)get_global_id(1);
uint z = (uint)get_global_id(2);
ulong x = get_global_id(0);
ulong y = get_global_id(1);
ulong z = get_global_id(2);
if ((x >= size.x) ||
(y >= size.y) ||
@@ -285,8 +279,8 @@ __kernel void copyBufferRect(
return;
}
uint offsSrc = srcRect.z + x + y * srcRect.x + z * srcRect.y;
uint offsDst = dstRect.z + x + y * dstRect.x + z * dstRect.y;
ulong offsSrc = srcRect.z + x + y * srcRect.x + z * srcRect.y;
ulong offsDst = dstRect.z + x + y * dstRect.x + z * dstRect.y;
dst[offsDst] = src[offsSrc];
}
@@ -294,13 +288,13 @@ __kernel void copyBufferRect(
__kernel void copyBufferRectAligned(
__global uint* src,
__global uint* dst,
uint4 srcRect,
uint4 dstRect,
uint4 size)
ulong4 srcRect,
ulong4 dstRect,
ulong4 size)
{
uint x = (uint)get_global_id(0);
uint y = (uint)get_global_id(1);
uint z = (uint)get_global_id(2);
ulong x = get_global_id(0);
ulong y = get_global_id(1);
ulong z = get_global_id(2);
if ((x >= size.x) ||
(y >= size.y) ||
@@ -308,8 +302,8 @@ __kernel void copyBufferRectAligned(
return;
}
uint offsSrc = srcRect.z + x + y * srcRect.x + z * srcRect.y;
uint offsDst = dstRect.z + x + y * dstRect.x + z * dstRect.y;
ulong offsSrc = srcRect.z + x + y * srcRect.x + z * srcRect.y;
ulong offsDst = dstRect.z + x + y * dstRect.x + z * dstRect.y;
if (size.w == 16) {
__global uint4* src4 = (__global uint4*)src;
@@ -324,18 +318,18 @@ __kernel void copyBufferRectAligned(
__kernel void copyBuffer(
__global uchar* src,
__global uchar* dst,
int srcOrigin,
int dstOrigin,
uint size)
ulong srcOrigin,
ulong dstOrigin,
ulong size)
{
uint id = (uint)get_global_id(0);
ulong id = get_global_id(0);
if (id >= size) {
return;
}
uint offsSrc = id + srcOrigin;
uint offsDst = id + dstOrigin;
ulong offsSrc = id + srcOrigin;
ulong offsDst = id + dstOrigin;
dst[offsDst] = src[offsSrc];
}
@@ -343,19 +337,19 @@ __kernel void copyBuffer(
__kernel void copyBufferAligned(
__global uint* src,
__global uint* dst,
int srcOrigin,
int dstOrigin,
uint size,
uint alignment)
ulong srcOrigin,
ulong dstOrigin,
ulong size,
uint alignment)
{
uint id = (uint)get_global_id(0);
ulong id = get_global_id(0);
if (id >= size) {
return;
}
uint offsSrc = id + srcOrigin;
uint offsDst = id + dstOrigin;
ulong offsSrc = id + srcOrigin;
ulong offsDst = id + dstOrigin;
if (alignment == 16) {
__global uint4* src4 = (__global uint4*)src;
@@ -371,11 +365,11 @@ __kernel void fillBuffer(
__global uchar* bufUChar,
__global uint* bufUInt,
__constant uchar* pattern,
uint patternSize,
uint offset,
uint size)
uint patternSize,
ulong offset,
ulong size)
{
uint id = (uint)get_global_id(0);
ulong id = get_global_id(0);
if (id >= size) {
return;
+39 -42
Просмотреть файл
@@ -1140,7 +1140,7 @@ KernelBlitManager::copyBufferToImage(
void
CalcRowSlicePitches(
cl_int* pitch, const cl_int* copySize,
cl_ulong* pitch, const cl_int* copySize,
size_t rowPitch, size_t slicePitch, const Memory& mem)
{
size_t memFmtSize = memoryFormatSize(mem.cal()->format_).size_;
@@ -1317,17 +1317,17 @@ KernelBlitManager::copyBufferToImageKernel(
granularity = 4;
}
CondLog(((srcOrigin[0] % granularity) != 0), "Unaligned offset in blit!");
cl_int srcOrg[4] = { (cl_int)srcOrigin[0] / granularity,
(cl_int)srcOrigin[1],
(cl_int)srcOrigin[2], 0 };
cl_ulong srcOrg[4] = { srcOrigin[0] / granularity,
srcOrigin[1],
srcOrigin[2], 0 };
setArgument(kernels_[blitType], 2, sizeof(srcOrg), srcOrg);
cl_int dstOrg[4] = { (cl_int)dstOrigin[0],
(cl_int)dstOrigin[1],
(cl_int)dstOrigin[2], 0 };
cl_int copySize[4] = { (cl_int)size[0],
(cl_int)size[1],
(cl_int)size[2], 0 };
cl_int dstOrg[4] = { (cl_int)dstOrigin[0],
(cl_int)dstOrigin[1],
(cl_int)dstOrigin[2], 0 };
cl_int copySize[4] = { (cl_int)size[0],
(cl_int)size[1],
(cl_int)size[2], 0 };
if (swapLayer) {
dstOrg[2] = dstOrg[1];
dstOrg[1] = 0;
@@ -1340,13 +1340,13 @@ KernelBlitManager::copyBufferToImageKernel(
// Program memory format
uint multiplier = memFmt.size_ / sizeof(uint32_t);
multiplier = (multiplier == 0) ? 1 : multiplier;
cl_int format[4] = { (cl_int)memFmt.components_,
(cl_int)memFmt.size_ / (cl_int)memFmt.components_,
(cl_int)multiplier, 0 };
cl_uint format[4] = { memFmt.components_,
memFmt.size_ / memFmt.components_,
multiplier, 0 };
setArgument(kernels_[blitType], 5, sizeof(format), format);
// Program row and slice pitches
cl_int pitch[4] = { 0 };
cl_ulong pitch[4] = { 0 };
CalcRowSlicePitches(pitch, copySize, rowPitch, slicePitch, gpuMem(dstMemory));
setArgument(kernels_[blitType], 6, sizeof(pitch), pitch);
@@ -1661,12 +1661,12 @@ KernelBlitManager::copyImageToBufferKernel(
setArgument(kernels_[blitType], 2, sizeof(cl_mem), &mem);
setArgument(kernels_[blitType], 3, sizeof(cl_mem), &mem);
cl_int srcOrg[4] = { (cl_int)srcOrigin[0],
(cl_int)srcOrigin[1],
(cl_int)srcOrigin[2], 0 };
cl_int copySize[4] = { (cl_int)size[0],
(cl_int)size[1],
(cl_int)size[2], 0 };
cl_int srcOrg[4] = { (cl_int)srcOrigin[0],
(cl_int)srcOrigin[1],
(cl_int)srcOrigin[2], 0 };
cl_int copySize[4] = { (cl_int)size[0],
(cl_int)size[1],
(cl_int)size[2], 0 };
if (swapLayer) {
srcOrg[2] = srcOrg[1];
srcOrg[1] = 0;
@@ -1685,22 +1685,22 @@ KernelBlitManager::copyImageToBufferKernel(
granularity = 4;
}
CondLog(((dstOrigin[0] % granularity) != 0), "Unaligned offset in blit!");
cl_int dstOrg[4] = { (cl_int)dstOrigin[0] / granularity,
(cl_int)dstOrigin[1],
(cl_int)dstOrigin[2], 0 };
cl_ulong dstOrg[4] = { dstOrigin[0] / granularity,
dstOrigin[1],
dstOrigin[2], 0 };
setArgument(kernels_[blitType], 5, sizeof(dstOrg), dstOrg);
setArgument(kernels_[blitType], 6, sizeof(copySize), copySize);
// Program memory format
uint multiplier = memFmt.size_ / sizeof(uint32_t);
multiplier = (multiplier == 0) ? 1 : multiplier;
cl_int format[4] = { (cl_int)memFmt.components_,
(cl_int)memFmt.size_ / (cl_int)memFmt.components_,
(cl_int)multiplier, 0 };
cl_uint format[4] = { memFmt.components_,
memFmt.size_ / memFmt.components_,
multiplier, 0 };
setArgument(kernels_[blitType], 7, sizeof(format), format);
// Program row and slice pitches
cl_int pitch[4] = { 0 };
cl_ulong pitch[4] = { 0 };
CalcRowSlicePitches(pitch, copySize, rowPitch, slicePitch, gpuMem(srcMemory));
setArgument(kernels_[blitType], 8, sizeof(pitch), pitch);
@@ -2130,18 +2130,15 @@ KernelBlitManager::copyBufferRect(
setArgument(kernels_[blitType], 0, sizeof(cl_mem), &mem);
mem = &gpuMem(dstMemory);
setArgument(kernels_[blitType], 1, sizeof(cl_mem), &mem);
cl_uint src[4] = { (cl_uint)srcRect.rowPitch_,
(cl_uint)srcRect.slicePitch_,
(cl_uint)srcRect.start_, 0 };
cl_ulong src[4] = { srcRect.rowPitch_,
srcRect.slicePitch_,
srcRect.start_, 0 };
setArgument(kernels_[blitType], 2, sizeof(src), src);
cl_uint dst[4] = { (cl_uint)dstRect.rowPitch_,
(cl_uint)dstRect.slicePitch_,
(cl_uint)dstRect.start_, 0 };
cl_ulong dst[4] = { dstRect.rowPitch_,
dstRect.slicePitch_,
dstRect.start_, 0 };
setArgument(kernels_[blitType], 3, sizeof(dst), dst);
cl_int copySize[4] = { (cl_int)size[0],
(cl_int)size[1],
(cl_int)size[2],
(cl_int)CopyRectAlignment[i] };
cl_ulong copySize[4] = { size[0], size[1], size[2], CopyRectAlignment[i] };
setArgument(kernels_[blitType], 4, sizeof(copySize), copySize);
// Create ND range object for the kernel's execution
@@ -2426,7 +2423,7 @@ KernelBlitManager::fillBuffer(
else {
uint fillType = FillBuffer;
size_t globalWorkOffset[3] = { 0, 0, 0 };
cl_int fillSize = size[0] / patternSize;
cl_ulong fillSize = size[0] / patternSize;
size_t globalWorkSize = amd::alignUp(fillSize, 256);
size_t localWorkSize = 256;
bool dwordAligned =
@@ -2450,7 +2447,7 @@ KernelBlitManager::fillBuffer(
memcpy(constBuf, pattern, patternSize);
gpuCB->unmap(&gpu());
setArgument(kernels_[fillType], 2, sizeof(cl_mem), &gpuCB);
cl_int offset = origin[0];
cl_ulong offset = origin[0];
if (dwordAligned) {
patternSize /= sizeof(uint32_t);
offset /= sizeof(uint32_t);
@@ -2528,14 +2525,14 @@ KernelBlitManager::copyBuffer(
mem = &gpuMem(dstMemory);
setArgument(kernels_[blitType], 1, sizeof(cl_mem), &mem);
// Program source origin
cl_int srcOffset = srcOrigin[0] / CopyBuffAlignment[i];;
cl_ulong srcOffset = srcOrigin[0] / CopyBuffAlignment[i];;
setArgument(kernels_[blitType], 2, sizeof(srcOffset), &srcOffset);
// Program destinaiton origin
cl_int dstOffset = dstOrigin[0] / CopyBuffAlignment[i];;
cl_ulong dstOffset = dstOrigin[0] / CopyBuffAlignment[i];;
setArgument(kernels_[blitType], 3, sizeof(dstOffset), &dstOffset);
cl_int copySize = size[0];
cl_ulong copySize = size[0];
setArgument(kernels_[blitType], 4, sizeof(copySize), &copySize);
if (blitType == BlitCopyBufferAligned) {
+6 -1
Просмотреть файл
@@ -320,7 +320,12 @@ Settings::create(
supportDepthsRGB_ = true;
}
if (use64BitPtr_) {
maxAllocSize_ = 4048 * Mi;
if (GPU_ENABLE_LARGE_ALLOCATION) {
maxAllocSize_ = 16ULL * Gi;
}
else {
maxAllocSize_ = 4048 * Mi;
}
}
else {
maxAllocSize_ = 3ULL * Gi;
+32 -32
Просмотреть файл
@@ -566,7 +566,7 @@ HsaBlitManager::importExportImage(
static void
CalcRowSlicePitches(
cl_int* pitch, const cl_int* copySize,
cl_ulong* pitch, const cl_int* copySize,
size_t rowPitch, size_t slicePitch, const Memory& mem)
{
const oclhsa::Image &hsaImage = static_cast< const oclhsa::Image &>(mem);
@@ -1067,14 +1067,14 @@ KernelBlitManager::copyBuffer(
clmem = ((cl_mem) as_cl<amd::Memory>(dstMemory.owner()));
kernels_[blitType]->parameters().set(1, sizeof(cl_mem), &clmem);
// Program source origin
cl_int srcOffset = srcOrigin[0] / CopyBuffAlignment[i];
cl_ulong srcOffset = srcOrigin[0] / CopyBuffAlignment[i];
kernels_[blitType]->parameters().set(2, sizeof(srcOffset), &srcOffset);
// Program destinaiton origin
cl_int dstOffset = dstOrigin[0] / CopyBuffAlignment[i];
cl_ulong dstOffset = dstOrigin[0] / CopyBuffAlignment[i];
kernels_[blitType]->parameters().set(3, sizeof(dstOffset), &dstOffset);
cl_int copySize = size[0];
cl_ulong copySize = size[0];
kernels_[blitType]->parameters().set(4, sizeof(copySize), &copySize);
if (blitType == BlitCopyBufferAligned) {
@@ -1187,18 +1187,18 @@ KernelBlitManager::copyBufferRect(
kernels_[blitType]->parameters().set(0, sizeof(cl_mem), &clmem);
clmem = ((cl_mem) as_cl<amd::Memory>(dstMemory.owner()));
kernels_[blitType]->parameters().set(1, sizeof(cl_mem), &clmem);
cl_uint src[4] = { (cl_uint)srcRect.rowPitch_,
(cl_uint)srcRect.slicePitch_,
(cl_uint)srcRect.start_, 0 };
cl_ulong src[4] = { srcRect.rowPitch_,
srcRect.slicePitch_,
srcRect.start_, 0 };
kernels_[blitType]->parameters().set(2, sizeof(src), src);
cl_uint dst[4] = { (cl_uint)dstRect.rowPitch_,
(cl_uint)dstRect.slicePitch_,
(cl_uint)dstRect.start_, 0 };
cl_ulong dst[4] = { dstRect.rowPitch_,
dstRect.slicePitch_,
dstRect.start_, 0 };
kernels_[blitType]->parameters().set(3, sizeof(dst), dst);
cl_int copySize[4] = { (cl_int)size[0],
(cl_int)size[1],
(cl_int)size[2],
(cl_int)CopyRectAlignment[i] };
cl_ulong copySize[4] = { size[0],
size[1],
size[2],
CopyRectAlignment[i] };
kernels_[blitType]->parameters().set(4, sizeof(copySize), copySize);
// Create ND range object for the kernel's execution
@@ -1311,23 +1311,23 @@ KernelBlitManager::copyImageToBuffer(
granularity = 4;
}
CondLog(((dstOrigin[0] % granularity) != 0), "Unaligned offset in blit!");
cl_int dstOrg[4] = { (cl_int)dstOrigin[0] / granularity,
(cl_int)dstOrigin[1],
(cl_int)dstOrigin[2],
0 };
cl_ulong dstOrg[4] = { dstOrigin[0] / granularity,
dstOrigin[1],
dstOrigin[2],
0 };
kernels_[blitType]->parameters().set(5, sizeof(dstOrg), dstOrg);
kernels_[blitType]->parameters().set(6, sizeof(copySize), copySize);
// Program memory format
uint multiplier = elementSize / sizeof(uint32_t);
multiplier = (multiplier == 0) ? 1 : multiplier;
cl_int format[4] = { (cl_int)numChannels,
(cl_int)elementSize / (cl_int)numChannels,
(cl_int)multiplier, 0 };
cl_uint format[4] = { (cl_uint)numChannels,
(cl_uint)(elementSize / numChannels),
multiplier, 0 };
kernels_[blitType]->parameters().set(7, sizeof(format), format);
// Program row and slice pitches
cl_int pitch[4] = { 0 };
cl_ulong pitch[4] = { 0 };
CalcRowSlicePitches(pitch, copySize, rowPitch, slicePitch, srcImage);
kernels_[blitType]->parameters().set(8, sizeof(pitch), pitch);
@@ -1429,9 +1429,9 @@ KernelBlitManager::copyBufferToImage(
granularity = 4;
}
CondLog(((srcOrigin[0] % granularity) != 0), "Unaligned offset in blit!");
cl_int srcOrg[4] = { (cl_int)srcOrigin[0] / granularity,
(cl_int)srcOrigin[1],
(cl_int)srcOrigin[2], 0 };
cl_ulong srcOrg[4] = { srcOrigin[0] / granularity,
srcOrigin[1],
srcOrigin[2], 0 };
kernels_[blitType]->parameters().set(2, sizeof(srcOrg), srcOrg);
cl_int dstOrg[4] = { (cl_int)dstOrigin[0],
@@ -1447,13 +1447,13 @@ KernelBlitManager::copyBufferToImage(
// Program memory format
uint multiplier = elementSize / sizeof(uint32_t);
multiplier = (multiplier == 0) ? 1 : multiplier;
cl_int format[4] = { (cl_int)numChannels,
(cl_int)elementSize / (cl_int)numChannels,
(cl_int)multiplier, 0 };
cl_uint format[4] = { (cl_uint)numChannels,
(cl_uint)(elementSize / numChannels),
multiplier, 0 };
kernels_[blitType]->parameters().set(5, sizeof(format), format);
// Program row and slice pitches
cl_int pitch[4] = { 0 };
cl_ulong pitch[4] = { 0 };
CalcRowSlicePitches(pitch, copySize, rowPitch, slicePitch, dstImage);
kernels_[blitType]->parameters().set(6, sizeof(pitch), pitch);
@@ -1617,7 +1617,7 @@ KernelBlitManager::fillBuffer(
uint fillType = FillBuffer;
size_t globalWorkOffset[3] = { 0, 0, 0 };
cl_int fillSize = size[0] / patternSize;
cl_ulong fillSize = size[0] / patternSize;
size_t globalWorkSize = amd::alignUp(fillSize, 256);
size_t localWorkSize = 256;
bool dwordAligned =
@@ -1650,14 +1650,14 @@ KernelBlitManager::fillBuffer(
cl_mem clmem = ((cl_mem) as_cl<amd::Memory>(fillMemory));
kernels_[fillType]->parameters().set(2, sizeof(cl_mem), &clmem);
cl_int offset = origin[0];
cl_ulong offset = origin[0];
if (dwordAligned) {
patternSize /= sizeof(uint32_t);
offset /= sizeof(uint32_t);
}
kernels_[fillType]->parameters().set(3, sizeof(cl_uint), &patternSize);
kernels_[fillType]->parameters().set(4, sizeof(offset), &offset);
kernels_[fillType]->parameters().set(5, sizeof(cl_mem), &fillSize);
kernels_[fillType]->parameters().set(5, sizeof(fillSize), &fillSize);
// Create ND range object for the kernel's execution
amd::NDRangeContainer ndrange(1,
+2
Просмотреть файл
@@ -152,6 +152,8 @@ release(bool, GPU_DIRECT_SRD, false, \
"Use indirect SRD access in HSAIL") \
release(bool, GPU_USE_DEVICE_QUEUE, false, \
"Use a dedicated device queue for the actual submissions") \
release(bool, GPU_ENABLE_LARGE_ALLOCATION, false, \
"Enable >4GB single allocations") \
release(bool, AMD_DEPTH_MSAA_INTEROP, false, \
"Enable depth stencil and MSAA buffer interop") \
release(bool, AMD_THREAD_TRACE_ENABLE, false, \