diff --git a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_ai.cpp b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_ai.cpp index 3beefe586c..8a6ea1d6ea 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_ai.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_ai.cpp @@ -48,6 +48,7 @@ #include #include +#include "core/inc/runtime.h" #include "hsakmt/hsakmt.h" #include "inc/hsa_ext_amd.h" #include "core/inc/hsa_internal.h" @@ -535,8 +536,34 @@ uint32_t ImageManagerAi::GetAddrlibSurfaceInfoAi( in.resourceType = ADDR_RSRC_TEX_2D; break; case HSA_EXT_IMAGE_GEOMETRY_3D: - in.resourceType = ADDR_RSRC_TEX_3D; - break; + { + in.resourceType = ADDR_RSRC_TEX_3D; + /* + * 3D swizzle modes enforce alignment + * of the number of slices to the block depth. + * If numSlices = 3 then the 3 slices are + * interleaved for 3D locality among the 8 slices + * that make up each block. This causes the memory + * footprint to jump to a 3x size of the ideal size + * + * 'enable3DSwizzleMode' flag tests for env variable + * HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG to enable or disable + * 3D swizzle: + * true: Keep view3dAs2dArray = 0 for real 3D interleaving. + * false: Use view3dAs2dArray = 1 to avoid the alignment + * expansion. + * 2D swizzle modes can lower size overhead but may yield + * suboptimal cache behavior for fully 3D volumetric + * operations. + */ + bool enable3DSwizzleMode = core::Runtime::runtime_singleton_->flag().enable_3d_swizzle(); + if (enable3DSwizzleMode) + in.flags.view3dAs2dArray = 0; + else + in.flags.view3dAs2dArray = 1; + + break; + } } in.flags.texture = 1; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_gfx11.cpp b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_gfx11.cpp index ba70fa65b7..1c1727885c 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_gfx11.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_gfx11.cpp @@ -48,6 +48,7 @@ #include #include +#include "core/inc/runtime.h" #include "inc/hsa_ext_amd.h" #include "core/inc/hsa_internal.h" #include "addrlib/src/core/addrlib.h" @@ -658,8 +659,34 @@ uint32_t ImageManagerGfx11::GetAddrlibSurfaceInfoNv( break; case HSA_EXT_IMAGE_GEOMETRY_3D: - in.resourceType = ADDR_RSRC_TEX_3D; - break; + { + in.resourceType = ADDR_RSRC_TEX_3D; + /* + * 3D swizzle modes enforce alignment + * of the number of slices to the block depth. + * If numSlices = 3 then the 3 slices are + * interleaved for 3D locality among the 8 slices + * that make up each block. This causes the memory + * footprint to jump to a 3x size of the ideal size + * + * 'enable3DSwizzleMode' flag tests for env variable + * HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG to enable or disable + * 3D swizzle: + * true: Keep view3dAs2dArray = 0 for real 3D interleaving. + * false: Use view3dAs2dArray = 1 to avoid the alignment + * expansion. + * 2D swizzle modes can lower size overhead but may yield + * suboptimal cache behavior for fully 3D volumetric + * operations. + */ + bool enable3DSwizzleMode = core::Runtime::runtime_singleton_->flag().enable_3d_swizzle(); + if (enable3DSwizzleMode) + in.flags.view3dAs2dArray = 0; + else + in.flags.view3dAs2dArray = 1; + + break; + } } in.flags.texture = 1; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_kv.cpp b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_kv.cpp index 9b34b325f1..2521bc5e76 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_kv.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_kv.cpp @@ -48,6 +48,7 @@ #include #include +#include "core/inc/runtime.h" #include "hsakmt/hsakmt.h" #include "inc/hsa_ext_amd.h" #include "core/inc/hsa_internal.h" @@ -858,8 +859,33 @@ bool ImageManagerKv::GetAddrlibSurfaceInfo( case HSA_EXT_IMAGE_GEOMETRY_3D: case HSA_EXT_IMAGE_GEOMETRY_2DA: case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH: - in.resourceType = ADDR_RSRC_TEX_3D; - break; + { + in.resourceType = ADDR_RSRC_TEX_3D; + /* + * 3D swizzle modes enforce alignment + * of the number of slices to the block depth. + * If numSlices = 3 then the 3 slices are + * interleaved for 3D locality among the 8 slices + * that make up each block. This causes the memory + * footprint to jump to a 3x size of the ideal size + * 'enable3DSwizzleMode' flag tests for env variable + * HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG to enable or disable + * 3D swizzle: + * true: Keep view3dAs2dArray = 0 for real 3D interleaving. + * false: Use view3dAs2dArray = 1 to avoid the alignment + * expansion. + * 2D swizzle modes can lower size overhead but may yield + * suboptimal cache behavior for fully 3D volumetric + * operations. + */ + bool enable3DSwizzleMode = core::Runtime::runtime_singleton_->flag().enable_3d_swizzle(); + if (enable3DSwizzleMode) + in.flags.view3dAs2dArray = 0; + else + in.flags.view3dAs2dArray = 1; + + break; + } } in.flags.texture = 1; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_nv.cpp b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_nv.cpp index 3675e3d626..64a73dd6c2 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_nv.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_nv.cpp @@ -48,6 +48,7 @@ #include #include +#include "core/inc/runtime.h" #include "inc/hsa_ext_amd.h" #include "core/inc/hsa_internal.h" #include "addrlib/src/core/addrlib.h" @@ -653,8 +654,33 @@ uint32_t ImageManagerNv::GetAddrlibSurfaceInfoNv( break; case HSA_EXT_IMAGE_GEOMETRY_3D: - in.resourceType = ADDR_RSRC_TEX_3D; - break; + { + in.resourceType = ADDR_RSRC_TEX_3D; + /* + * 3D swizzle modes enforce alignment + * of the number of slices to the block depth. + * If numSlices = 3 then the 3 slices are + * interleaved for 3D locality among the 8 slices + * that make up each block. This causes the memory + * footprint to jump from an ideal size to 3x the size. + * 'enable3DSwizzleMode' flag tests for env variable + * HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG to enable or disable + * 3D swizzle: + * true: Keep view3dAs2dArray = 0 for real 3D interleaving. + * false: Use view3dAs2dArray = 1 to avoid the alignment + * expansion. + * 2D swizzle modes can lower size overhead but may yield + * suboptimal cache behavior for fully 3D volumetric + * operations. + */ + bool enable3DSwizzleMode = core::Runtime::runtime_singleton_->flag().enable_3d_swizzle(); + if (enable3DSwizzleMode) + in.flags.view3dAs2dArray = 0; + else + in.flags.view3dAs2dArray = 1; + + break; + } } in.flags.texture = 1;