rocr: GFX9, GFX10, GFX11: Use view3dAs2dArray flag, for thick/3D swizzle modes. (#58)

A HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG environment flag exists already to
enable/disable this. Default value is false (view3dAs2dArray = 1)
Enabling this flag will enable support for swizzles that do 3D
interleaving on GFX9, GF10 and GFX11. By default support for swizzles that
do 3D interleaving is disabled.

[ROCm/ROCR-Runtime commit: 0984a1f0fd]
Αυτή η υποβολή περιλαμβάνεται σε:
Khatri, Shweta
2025-02-26 09:38:17 -05:00
υποβλήθηκε από GitHub
γονέας 3b30b8a975
υποβολή 9816c2ecd3
4 αρχεία άλλαξαν με 114 προσθήκες και 8 διαγραφές
@@ -48,6 +48,7 @@
#include <algorithm>
#include <climits>
#include "core/inc/runtime.h"
#include "hsakmt/hsakmt.h"
#include "inc/hsa_ext_amd.h"
#include "core/inc/hsa_internal.h"
@@ -535,8 +536,34 @@ uint32_t ImageManagerAi::GetAddrlibSurfaceInfoAi(
in.resourceType = ADDR_RSRC_TEX_2D;
break;
case HSA_EXT_IMAGE_GEOMETRY_3D:
in.resourceType = ADDR_RSRC_TEX_3D;
break;
{
in.resourceType = ADDR_RSRC_TEX_3D;
/*
* 3D swizzle modes enforce alignment
* of the number of slices to the block depth.
* If numSlices = 3 then the 3 slices are
* interleaved for 3D locality among the 8 slices
* that make up each block. This causes the memory
* footprint to jump to a 3x size of the ideal size
*
* 'enable3DSwizzleMode' flag tests for env variable
* HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG to enable or disable
* 3D swizzle:
* true: Keep view3dAs2dArray = 0 for real 3D interleaving.
* false: Use view3dAs2dArray = 1 to avoid the alignment
* expansion.
* 2D swizzle modes can lower size overhead but may yield
* suboptimal cache behavior for fully 3D volumetric
* operations.
*/
bool enable3DSwizzleMode = core::Runtime::runtime_singleton_->flag().enable_3d_swizzle();
if (enable3DSwizzleMode)
in.flags.view3dAs2dArray = 0;
else
in.flags.view3dAs2dArray = 1;
break;
}
}
in.flags.texture = 1;
@@ -48,6 +48,7 @@
#include <algorithm>
#include <climits>
#include "core/inc/runtime.h"
#include "inc/hsa_ext_amd.h"
#include "core/inc/hsa_internal.h"
#include "addrlib/src/core/addrlib.h"
@@ -658,8 +659,34 @@ uint32_t ImageManagerGfx11::GetAddrlibSurfaceInfoNv(
break;
case HSA_EXT_IMAGE_GEOMETRY_3D:
in.resourceType = ADDR_RSRC_TEX_3D;
break;
{
in.resourceType = ADDR_RSRC_TEX_3D;
/*
* 3D swizzle modes enforce alignment
* of the number of slices to the block depth.
* If numSlices = 3 then the 3 slices are
* interleaved for 3D locality among the 8 slices
* that make up each block. This causes the memory
* footprint to jump to a 3x size of the ideal size
*
* 'enable3DSwizzleMode' flag tests for env variable
* HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG to enable or disable
* 3D swizzle:
* true: Keep view3dAs2dArray = 0 for real 3D interleaving.
* false: Use view3dAs2dArray = 1 to avoid the alignment
* expansion.
* 2D swizzle modes can lower size overhead but may yield
* suboptimal cache behavior for fully 3D volumetric
* operations.
*/
bool enable3DSwizzleMode = core::Runtime::runtime_singleton_->flag().enable_3d_swizzle();
if (enable3DSwizzleMode)
in.flags.view3dAs2dArray = 0;
else
in.flags.view3dAs2dArray = 1;
break;
}
}
in.flags.texture = 1;
@@ -48,6 +48,7 @@
#include <algorithm>
#include <climits>
#include "core/inc/runtime.h"
#include "hsakmt/hsakmt.h"
#include "inc/hsa_ext_amd.h"
#include "core/inc/hsa_internal.h"
@@ -858,8 +859,33 @@ bool ImageManagerKv::GetAddrlibSurfaceInfo(
case HSA_EXT_IMAGE_GEOMETRY_3D:
case HSA_EXT_IMAGE_GEOMETRY_2DA:
case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH:
in.resourceType = ADDR_RSRC_TEX_3D;
break;
{
in.resourceType = ADDR_RSRC_TEX_3D;
/*
* 3D swizzle modes enforce alignment
* of the number of slices to the block depth.
* If numSlices = 3 then the 3 slices are
* interleaved for 3D locality among the 8 slices
* that make up each block. This causes the memory
* footprint to jump to a 3x size of the ideal size
* 'enable3DSwizzleMode' flag tests for env variable
* HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG to enable or disable
* 3D swizzle:
* true: Keep view3dAs2dArray = 0 for real 3D interleaving.
* false: Use view3dAs2dArray = 1 to avoid the alignment
* expansion.
* 2D swizzle modes can lower size overhead but may yield
* suboptimal cache behavior for fully 3D volumetric
* operations.
*/
bool enable3DSwizzleMode = core::Runtime::runtime_singleton_->flag().enable_3d_swizzle();
if (enable3DSwizzleMode)
in.flags.view3dAs2dArray = 0;
else
in.flags.view3dAs2dArray = 1;
break;
}
}
in.flags.texture = 1;
@@ -48,6 +48,7 @@
#include <algorithm>
#include <climits>
#include "core/inc/runtime.h"
#include "inc/hsa_ext_amd.h"
#include "core/inc/hsa_internal.h"
#include "addrlib/src/core/addrlib.h"
@@ -653,8 +654,33 @@ uint32_t ImageManagerNv::GetAddrlibSurfaceInfoNv(
break;
case HSA_EXT_IMAGE_GEOMETRY_3D:
in.resourceType = ADDR_RSRC_TEX_3D;
break;
{
in.resourceType = ADDR_RSRC_TEX_3D;
/*
* 3D swizzle modes enforce alignment
* of the number of slices to the block depth.
* If numSlices = 3 then the 3 slices are
* interleaved for 3D locality among the 8 slices
* that make up each block. This causes the memory
* footprint to jump from an ideal size to 3x the size.
* 'enable3DSwizzleMode' flag tests for env variable
* HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG to enable or disable
* 3D swizzle:
* true: Keep view3dAs2dArray = 0 for real 3D interleaving.
* false: Use view3dAs2dArray = 1 to avoid the alignment
* expansion.
* 2D swizzle modes can lower size overhead but may yield
* suboptimal cache behavior for fully 3D volumetric
* operations.
*/
bool enable3DSwizzleMode = core::Runtime::runtime_singleton_->flag().enable_3d_swizzle();
if (enable3DSwizzleMode)
in.flags.view3dAs2dArray = 0;
else
in.flags.view3dAs2dArray = 1;
break;
}
}
in.flags.texture = 1;