diff --git a/CMakeLists.txt b/CMakeLists.txt index c46a9a7bc5..d9652d2728 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,10 +44,6 @@ message("Package version: ${VERSION_STRING}") # which requires the gRPC option(BUILD_STANDALONE "Build targets for rdci and rdcd" ON) -# When cmake -DBUILD_RASLIB=off, it will not build the raslib -# which requires the ROCT-Thunk-Interface. -option(BUILD_RASLIB "Build targets for raslib" OFF) - # When cmake -DBUILD_RUNTIME=off, it will not build the librdc_rocr.so # which requires the Rocm run time. option(BUILD_RUNTIME "Build targets for librdc_rocr.so" ON) @@ -149,12 +145,6 @@ if(BUILD_STANDALONE AND GRPC_ROOT STREQUAL GRPC_ROOT_DEFAULT) Continuing without gRPC install") endif() -if(NOT EXISTS "${CMAKE_SOURCE_DIR}/raslib/.git" AND BUILD_RASLIB) - message(FATAL_ERROR "The git submodule raslib is not available. Please run - git submodule update --init --recursive - If you do not want to build raslib, use cmake -DBUILD_RASLIB=off") -endif() - find_package(SMI NAMES amd_smi HINTS ${ROCM_DIR}/lib/cmake @@ -167,12 +157,6 @@ if(NOT EXISTS "${SMI_INC_DIR}" OR NOT EXISTS "${SMI_LIB_DIR}") make sure amd_smi is installed and present in ${SMI_INC_DIR}.") endif() -if(BUILD_RASLIB AND NOT DEFINED HSA_DIR) - message(FATAL_ERROR "Please specify libhsakmt directory which is required by raslib - cmake -DHSA_DIR= - If you do not want to build raslib, use cmake -DBUILD_RASLIB=off") -endif() - set(${RDC}_VERSION_MAJOR "${VERSION_MAJOR}") set(${RDC}_VERSION_MINOR "${VERSION_MINOR}") set(${RDC}_VERSION_PATCH "0") @@ -315,20 +299,6 @@ if(BUILD_STANDALONE) endif() endif() -# Raslib -if(BUILD_RASLIB) - message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&") - message(" Build raslib") - message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&") - - add_subdirectory("raslib") -else() - add_library(rdc_ras INTERFACE - ${PROJECT_SOURCE_DIR}/ras_prebuild/librdc_ras.so) - # needed for copying the pre-built library - get_target_property(RAS_SOURCES rdc_ras SOURCES) -endif() - # Folders for both standalone and embedded add_subdirectory("rdc_libs") @@ -377,24 +347,6 @@ install(DIRECTORY ${PROJECT_SOURCE_DIR}/example DESTINATION ${RDC_SHARE_INSTALL_PREFIX} COMPONENT dev) -# Prebuild packages to install -install(PROGRAMS ${RAS_SOURCES} - DESTINATION ${CMAKE_INSTALL_LIBDIR}/${RDC} - COMPONENT ${CLIENT_COMPONENT}) -install(DIRECTORY ${PROJECT_SOURCE_DIR}/ras_prebuild/config - DESTINATION ${CMAKE_INSTALL_DATADIR}/${RDC} - COMPONENT ${CLIENT_COMPONENT}) -# Don't sp3 grpc install because it floods the terminal -set(OLD_CMAKE_INSTALL_MESSAGE ${CMAKE_INSTALL_MESSAGE}) -set(CMAKE_INSTALL_MESSAGE NEVER) -install(DIRECTORY ${PROJECT_SOURCE_DIR}/ras_prebuild/sp3 - DESTINATION ${CMAKE_INSTALL_LIBDIR}/${RDC} - COMPONENT ${CLIENT_COMPONENT}) -# Restore printing verbosity -set(CMAKE_INSTALL_MESSAGE ${OLD_CMAKE_INSTALL_MESSAGE}) -unset(OLD_CMAKE_INSTALL_MESSAGE) - - #Identify between SLES and Centos for setting symlink for rdc.service #SLES need serice file in /usr/lib/systemd/system/rdc.service #CENTOS/RHEL Require file in /lib/systemd/system/rdc.service diff --git a/cmake_modules/rdc-backward-compat.cmake b/cmake_modules/rdc-backward-compat.cmake index ed3fb50484..f0af2f00ba 100644 --- a/cmake_modules/rdc-backward-compat.cmake +++ b/cmake_modules/rdc-backward-compat.cmake @@ -59,13 +59,11 @@ function(create_library_symlink) # Symlink for private libraries set(LIB_RDC_ROCR "librdc_rocr.so") set(LIB_RDC_ROCP "librdc_rocp.so") - set(LIB_RDC_RAS "librdc_ras.so") set(LIB_RDC_RVS "librdc_rvs.so") set(LIB_RDC_CLIENT_SMI "librdc_client_smi.so") set(library_files "${LIB_RDC_ROCR}" "${LIB_RDC_ROCR}.${MAJ_VERSION}" "${LIB_RDC_ROCR}.${SO_VERSION}" ) set(library_files "${LIB_RDC_ROCP}" "${LIB_RDC_ROCP}.${MAJ_VERSION}" "${LIB_RDC_ROCP}.${SO_VERSION}" ) set(library_files "${library_files}" "${LIB_RDC_CLIENT_SMI}" "${LIB_RDC_CLIENT_SMI}.${MAJ_VERSION}" "${LIB_RDC_CLIENT_SMI}.${SO_VERSION}" ) - set(library_files "${library_files}" "${LIB_RDC_RAS}") set(library_files "${library_files}" "${LIB_RDC_RVS}") foreach(file_name ${library_files}) diff --git a/common/rdc_field.data b/common/rdc_field.data index a74e582353..d3de98439e 100644 --- a/common/rdc_field.data +++ b/common/rdc_field.data @@ -43,41 +43,57 @@ FLD_DESC_ENT(RDC_FI_GPU_TEMP, "GPU temperature in millidegrees Celsiu FLD_DESC_ENT(RDC_FI_POWER_USAGE, "Power usage in microwatts", "POWER_USAGE", true) FLD_DESC_ENT(RDC_FI_PCIE_TX, "PCIe Tx utilization in bytes/second", "PCIE_TX", true) FLD_DESC_ENT(RDC_FI_PCIE_RX, "PCIe Rx utilization in bytes/second", "PCIE_RX", true) -FLD_DESC_ENT(RDC_FI_PCIE_BANDWIDTH, "PCIe bandwidth in GB/sec", "PCIE_BANDWIDTH", true) +FLD_DESC_ENT(RDC_FI_PCIE_BANDWIDTH, "PCIe bandwidth in GB/sec", "PCIE_BANDWIDTH", true) FLD_DESC_ENT(RDC_FI_GPU_UTIL, "GPU busy percentage", "GPU_UTIL", true) FLD_DESC_ENT(RDC_FI_GPU_MEMORY_USAGE, "Memory usage of the GPU instance in bytes", "GPU_MEMORY_USAGE", true) FLD_DESC_ENT(RDC_FI_GPU_MEMORY_TOTAL, "Total memory of the GPU instance", "GPU_MEMORY_TOTAL", true) + +// ECC totals FLD_DESC_ENT(RDC_FI_ECC_CORRECT_TOTAL, "Accumulated Single Error Correction", "ECC_CORRECT", true) FLD_DESC_ENT(RDC_FI_ECC_UNCORRECT_TOTAL, "Accumulated Double Error Detection", "ECC_UNCORRECT", true) -FLD_DESC_ENT(RDC_FI_ECC_SDMA_SEC, "SDMA Single Error Correction", "ECC_SDMA_SEC", true) -FLD_DESC_ENT(RDC_FI_ECC_SDMA_DED, "SDMA Double Error Detection", "ECC_SDMA_DED", true) -FLD_DESC_ENT(RDC_FI_ECC_GFX_SEC, "GFX Single Error Correction", "ECC_GFX_SEC", true) -FLD_DESC_ENT(RDC_FI_ECC_GFX_DED, "GFX Double Error Detection", "ECC_GFX_DED", true) -FLD_DESC_ENT(RDC_FI_ECC_MMHUB_SEC, "MMHUB Single Error Correction", "ECC_MMHUB_SEC", true) -FLD_DESC_ENT(RDC_FI_ECC_MMHUB_DED, "MMHUB Double Error Detection", "ECC_MMHUB_DED", true) -FLD_DESC_ENT(RDC_FI_ECC_ATHUB_SEC, "ATHUB Single Error Correction", "ECC_ATHUB_SEC", true) -FLD_DESC_ENT(RDC_FI_ECC_ATHUB_DED, "ATHUB Double Error Detection", "ECC_ATHUB_DED", true) -FLD_DESC_ENT(RDC_FI_ECC_BIF_SEC, "BIF Single Error Correction", "ECC_BIF_SEC", true) -FLD_DESC_ENT(RDC_FI_ECC_BIF_DED, "BIF Double Error Detection", "ECC_BIF_DED", true) -FLD_DESC_ENT(RDC_FI_ECC_HDP_SEC, "HDP Single Error Correction", "ECC_HDP_SEC", true) -FLD_DESC_ENT(RDC_FI_ECC_HDP_DED, "HDP Double Error Detection", "ECC_HDP_DED", true) -FLD_DESC_ENT(RDC_FI_ECC_XGMI_WAFL_SEC, "XGMI WAFL Single Error Correction", "ECC_XGMI_WAFL_SEC",true) -FLD_DESC_ENT(RDC_FI_ECC_XGMI_WAFL_DED, "XGMI WAFL Double Error Detection", "ECC_XGMI_WAFL_DED",true) -FLD_DESC_ENT(RDC_FI_ECC_DF_SEC, "DF Single Error Correction", "ECC_DF_SEC", true) -FLD_DESC_ENT(RDC_FI_ECC_DF_DED, "DF Double Error Detection", "ECC_DF_DED", true) -FLD_DESC_ENT(RDC_FI_ECC_SMN_SEC, "SMN Single Error Correction", "ECC_SMN_SEC", true) -FLD_DESC_ENT(RDC_FI_ECC_SMN_DED, "SMN Double Error Detection", "ECC_SMN_DED", true) -FLD_DESC_ENT(RDC_FI_ECC_SEM_SEC, "SEM Single Error Correction", "ECC_SEM_SEC", true) -FLD_DESC_ENT(RDC_FI_ECC_SEM_DED, "SEM Double Error Detection", "ECC_SEM_DED", true) -FLD_DESC_ENT(RDC_FI_ECC_MP0_SEC, "MP0 Single Error Correction", "ECC_MP0_SEC", true) -FLD_DESC_ENT(RDC_FI_ECC_MP0_DED, "MP0 Double Error Detection", "ECC_MP0_DED", true) -FLD_DESC_ENT(RDC_FI_ECC_MP1_SEC, "MP1 Single Error Correction", "ECC_MP1_SEC", true) -FLD_DESC_ENT(RDC_FI_ECC_MP1_DED, "MP1 Double Error Detection", "ECC_MP1_DED", true) -FLD_DESC_ENT(RDC_FI_ECC_FUSE_SEC, "FUSE Single Error Correction", "ECC_FUSE_SEC", true) -FLD_DESC_ENT(RDC_FI_ECC_FUSE_DED, "FUSE Double Error Detection", "ECC_FUSE_DED", true) -FLD_DESC_ENT(RDC_FI_ECC_UMC_SEC, "UMC Single Error Correction", "ECC_UMC_SEC", true) -FLD_DESC_ENT(RDC_FI_ECC_UMC_DED, "UMC Double Error Detection", "ECC_UMC_DED", true) + +// ECC blocks +FLD_DESC_ENT(RDC_FI_ECC_SDMA_CE, "SDMA Correctable Error", "ECC_SDMA_CE", true) +FLD_DESC_ENT(RDC_FI_ECC_SDMA_UE, "SDMA Uncorrectable Error", "ECC_SDMA_UE", true) +FLD_DESC_ENT(RDC_FI_ECC_GFX_CE, "GFX Correctable Error", "ECC_GFX_CE", true) +FLD_DESC_ENT(RDC_FI_ECC_GFX_UE, "GFX Uncorrectable Error", "ECC_GFX_UE", true) +FLD_DESC_ENT(RDC_FI_ECC_MMHUB_CE, "MMHUB Correctable Error", "ECC_MMHUB_CE", true) +FLD_DESC_ENT(RDC_FI_ECC_MMHUB_UE, "MMHUB Uncorrectable Error", "ECC_MMHUB_UE", true) +FLD_DESC_ENT(RDC_FI_ECC_ATHUB_CE, "ATHUB Correctable Error", "ECC_ATHUB_CE", true) +FLD_DESC_ENT(RDC_FI_ECC_ATHUB_UE, "ATHUB Uncorrectable Error", "ECC_ATHUB_UE", true) +FLD_DESC_ENT(RDC_FI_ECC_PCIE_BIF_CE, "PCIE_BIF Correctable Error", "ECC_PCIE_BIF_CE", true) +FLD_DESC_ENT(RDC_FI_ECC_PCIE_BIF_UE, "PCIE_BIF Uncorrectable Error", "ECC_PCIE_BIF_UE", true) +FLD_DESC_ENT(RDC_FI_ECC_HDP_CE, "HDP Correctable Error", "ECC_HDP_CE", true) +FLD_DESC_ENT(RDC_FI_ECC_HDP_UE, "HDP Uncorrectable Error", "ECC_HDP_UE", true) +FLD_DESC_ENT(RDC_FI_ECC_XGMI_WAFL_CE, "XGMI_WAFL Correctable Error", "ECC_XGMI_WAFL_CE", true) +FLD_DESC_ENT(RDC_FI_ECC_XGMI_WAFL_UE, "XGMI_WAFL Uncorrectable Error", "ECC_XGMI_WAFL_UE", true) +FLD_DESC_ENT(RDC_FI_ECC_DF_CE, "DF Correctable Error", "ECC_DF_CE", true) +FLD_DESC_ENT(RDC_FI_ECC_DF_UE, "DF Uncorrectable Error", "ECC_DF_UE", true) +FLD_DESC_ENT(RDC_FI_ECC_SMN_CE, "SMN Correctable Error", "ECC_SMN_CE", true) +FLD_DESC_ENT(RDC_FI_ECC_SMN_UE, "SMN Uncorrectable Error", "ECC_SMN_UE", true) +FLD_DESC_ENT(RDC_FI_ECC_SEM_CE, "SEM Correctable Error", "ECC_SEM_CE", true) +FLD_DESC_ENT(RDC_FI_ECC_SEM_UE, "SEM Uncorrectable Error", "ECC_SEM_UE", true) +FLD_DESC_ENT(RDC_FI_ECC_MP0_CE, "MP0 Correctable Error", "ECC_MP0_CE", true) +FLD_DESC_ENT(RDC_FI_ECC_MP0_UE, "MP0 Uncorrectable Error", "ECC_MP0_UE", true) +FLD_DESC_ENT(RDC_FI_ECC_MP1_CE, "MP1 Correctable Error", "ECC_MP1_CE", true) +FLD_DESC_ENT(RDC_FI_ECC_MP1_UE, "MP1 Uncorrectable Error", "ECC_MP1_UE", true) +FLD_DESC_ENT(RDC_FI_ECC_FUSE_CE, "FUSE Correctable Error", "ECC_FUSE_CE", true) +FLD_DESC_ENT(RDC_FI_ECC_FUSE_UE, "FUSE Uncorrectable Error", "ECC_FUSE_UE", true) +FLD_DESC_ENT(RDC_FI_ECC_UMC_CE, "UMC Correctable Error", "ECC_UMC_CE", true) +FLD_DESC_ENT(RDC_FI_ECC_UMC_UE, "UMC Uncorrectable Error", "ECC_UMC_UE", true) +FLD_DESC_ENT(RDC_FI_ECC_MCA_CE, "MCA Correctable Error", "ECC_MCA_CE", true) +FLD_DESC_ENT(RDC_FI_ECC_MCA_UE, "MCA Uncorrectable Error", "ECC_MCA_UE", true) +FLD_DESC_ENT(RDC_FI_ECC_VCN_CE, "VCN Correctable Error", "ECC_VCN_CE", true) +FLD_DESC_ENT(RDC_FI_ECC_VCN_UE, "VCN Uncorrectable Error", "ECC_VCN_UE", true) +FLD_DESC_ENT(RDC_FI_ECC_JPEG_CE, "JPEG Correctable Error", "ECC_JPEG_CE", true) +FLD_DESC_ENT(RDC_FI_ECC_JPEG_UE, "JPEG Uncorrectable Error", "ECC_JPEG_UE", true) +FLD_DESC_ENT(RDC_FI_ECC_IH_CE, "IH Correctable Error", "ECC_IH_CE", true) +FLD_DESC_ENT(RDC_FI_ECC_IH_UE, "IH Uncorrectable Error", "ECC_IH_UE", true) +FLD_DESC_ENT(RDC_FI_ECC_MPIO_CE, "MPIO Correctable Error", "ECC_MPIO_CE", true) +FLD_DESC_ENT(RDC_FI_ECC_MPIO_UE, "MPIO Uncorrectable Error", "ECC_MPIO_UE", true) + +// XGMI FLD_DESC_ENT(RDC_FI_XGMI_0_READ_KB, "XGMI0 accumulated data read size (KB)", "XGMI_0_READ", true) FLD_DESC_ENT(RDC_FI_XGMI_1_READ_KB, "XGMI1 accumulated data read size (KB)", "XGMI_1_READ", true) FLD_DESC_ENT(RDC_FI_XGMI_2_READ_KB, "XGMI2 accumulated data read size (KB)", "XGMI_2_READ", true) diff --git a/include/rdc/rdc.h b/include/rdc/rdc.h index d6ee5fc12b..9919ee4d35 100644 --- a/include/rdc/rdc.h +++ b/include/rdc/rdc.h @@ -185,47 +185,46 @@ typedef enum { RDC_FI_ECC_CORRECT_TOTAL = 600, //!< Accumulated correctable ECC errors RDC_FI_ECC_UNCORRECT_TOTAL, //!< Accumulated uncorrectable ECC errors - RDC_FI_ECC_SDMA_SEC, //!< SDMA Single Error Correction - RDC_FI_ECC_SDMA_DED, //!< SDMA Double Error Detection - - RDC_FI_ECC_GFX_SEC, //!< GFX Single Error Correction - RDC_FI_ECC_GFX_DED, //!< GFX Double Error Detection - - RDC_FI_ECC_MMHUB_SEC, //!< MMHUB Single Error Correction - RDC_FI_ECC_MMHUB_DED, //!< MMHUB Double Error Detection - - RDC_FI_ECC_ATHUB_SEC, //!< ATHUB Single Error Correction - RDC_FI_ECC_ATHUB_DED, //!< ATHUB Double Error Detection - - RDC_FI_ECC_BIF_SEC, //!< BIF Single Error Correction - RDC_FI_ECC_BIF_DED, //!< BIF Double Error Detection - - RDC_FI_ECC_HDP_SEC, //!< HDP Single Error Correction - RDC_FI_ECC_HDP_DED, //!< HDP Double Error Detection - - RDC_FI_ECC_XGMI_WAFL_SEC, //!< XGMI WAFL Single Error Correction - RDC_FI_ECC_XGMI_WAFL_DED, //!< XGMI WAFL Double Error Detection - - RDC_FI_ECC_DF_SEC, //!< DF Single Error Correction - RDC_FI_ECC_DF_DED, //!< DF Double Error Detection - - RDC_FI_ECC_SMN_SEC, //!< SMN Single Error Correction - RDC_FI_ECC_SMN_DED, //!< SMN Double Error Detection - - RDC_FI_ECC_SEM_SEC, //!< SEM Single Error Correction - RDC_FI_ECC_SEM_DED, //!< SEM Double Error Detection - - RDC_FI_ECC_MP0_SEC, //!< MP0 Single Error Correction - RDC_FI_ECC_MP0_DED, //!< MP0 Double Error Detection - - RDC_FI_ECC_MP1_SEC, //!< MP1 Single Error Correction - RDC_FI_ECC_MP1_DED, //!< MP1 Double Error Detection - - RDC_FI_ECC_FUSE_SEC, //!< FUSE Single Error Correction - RDC_FI_ECC_FUSE_DED, //!< FUSE Double Error Detection - - RDC_FI_ECC_UMC_SEC, //!< UMC Single Error Correction - RDC_FI_ECC_UMC_DED, //!< UMC Double Error Detection + RDC_FI_ECC_FIRST = 602, //!< FIRST Error Correction and Detection field + RDC_FI_ECC_SDMA_CE = RDC_FI_ECC_FIRST, + RDC_FI_ECC_SDMA_UE, + RDC_FI_ECC_GFX_CE, + RDC_FI_ECC_GFX_UE, + RDC_FI_ECC_MMHUB_CE, + RDC_FI_ECC_MMHUB_UE, + RDC_FI_ECC_ATHUB_CE, + RDC_FI_ECC_ATHUB_UE, + RDC_FI_ECC_PCIE_BIF_CE, + RDC_FI_ECC_PCIE_BIF_UE, + RDC_FI_ECC_HDP_CE, + RDC_FI_ECC_HDP_UE, + RDC_FI_ECC_XGMI_WAFL_CE, + RDC_FI_ECC_XGMI_WAFL_UE, + RDC_FI_ECC_DF_CE, + RDC_FI_ECC_DF_UE, + RDC_FI_ECC_SMN_CE, + RDC_FI_ECC_SMN_UE, + RDC_FI_ECC_SEM_CE, + RDC_FI_ECC_SEM_UE, + RDC_FI_ECC_MP0_CE, + RDC_FI_ECC_MP0_UE, + RDC_FI_ECC_MP1_CE, + RDC_FI_ECC_MP1_UE, + RDC_FI_ECC_FUSE_CE, + RDC_FI_ECC_FUSE_UE, + RDC_FI_ECC_UMC_CE, + RDC_FI_ECC_UMC_UE, + RDC_FI_ECC_MCA_CE, + RDC_FI_ECC_MCA_UE, + RDC_FI_ECC_VCN_CE, + RDC_FI_ECC_VCN_UE, + RDC_FI_ECC_JPEG_CE, + RDC_FI_ECC_JPEG_UE, + RDC_FI_ECC_IH_CE, + RDC_FI_ECC_IH_UE, + RDC_FI_ECC_MPIO_CE, + RDC_FI_ECC_MPIO_UE, + RDC_FI_ECC_LAST = RDC_FI_ECC_MPIO_UE, // In new ASCI, such as MI300, the XGMI events is not supported // Using below XGMI related fields to calculate the bandwidth. @@ -320,6 +319,13 @@ typedef enum { RDC_EVNT_NOTIF_LAST = RDC_EVNT_NOTIF_RING_HANG, } rdc_field_t; + +// even and odd numbers are used for correctable and uncorrectable errors +static_assert(RDC_FI_ECC_SDMA_CE % 2 == 0, "Correctable Error enum is not even"); +static_assert(RDC_FI_ECC_SDMA_UE % 2 == 1, "Uncorrectable Error enum is not odd"); +static_assert(RDC_FI_ECC_MPIO_CE % 2 == 0, "Correctable Error enum is not even"); +static_assert(RDC_FI_ECC_MPIO_UE % 2 == 1, "Uncorrectable Error enum is not odd"); + #define RDC_EVNT_IS_NOTIF_FIELD(FIELD) \ ((FIELD) >= RDC_EVNT_NOTIF_FIRST && (FIELD) <= RDC_EVNT_NOTIF_LAST) /** diff --git a/include/rdc_lib/RdcDiagnosticLibInterface.h b/include/rdc_lib/RdcDiagnosticLibInterface.h index 9088cce673..09ac8c8ac9 100644 --- a/include/rdc_lib/RdcDiagnosticLibInterface.h +++ b/include/rdc_lib/RdcDiagnosticLibInterface.h @@ -22,7 +22,7 @@ THE SOFTWARE. #ifndef INCLUDE_RDC_LIB_RDCDIAGNOSTICLIBINTERFACE_H_ #define INCLUDE_RDC_LIB_RDCDIAGNOSTICLIBINTERFACE_H_ -// The telemetry interface for libraries, for example, RAS. +// The telemetry interface for libraries, for example, AMD-SMI. #include extern "C" { diff --git a/include/rdc_lib/RdcTelemetryLibInterface.h b/include/rdc_lib/RdcTelemetryLibInterface.h index e50e8e63cd..843f7371c4 100644 --- a/include/rdc_lib/RdcTelemetryLibInterface.h +++ b/include/rdc_lib/RdcTelemetryLibInterface.h @@ -22,7 +22,7 @@ THE SOFTWARE. #ifndef INCLUDE_RDC_LIB_RDCTELEMETRYLIBINTERFACE_H_ #define INCLUDE_RDC_LIB_RDCTELEMETRYLIBINTERFACE_H_ -// The telemetry interface for libraries, for example, RAS. +// The telemetry interface for libraries, for example, AMD-SMI. #include #include diff --git a/include/rdc_lib/impl/RdcMetricFetcherImpl.h b/include/rdc_lib/impl/RdcMetricFetcherImpl.h index ac8e6ff73e..63745a66df 100644 --- a/include/rdc_lib/impl/RdcMetricFetcherImpl.h +++ b/include/rdc_lib/impl/RdcMetricFetcherImpl.h @@ -84,7 +84,8 @@ class RdcMetricFetcherImpl final : public RdcMetricFetcher { std::shared_ptr get_smi_data(RdcFieldKey key); uint64_t now(); - void get_ecc_error(uint32_t gpu_index, rdc_field_t field_id, rdc_field_value* value); + void get_ecc(uint32_t gpu_index, rdc_field_t field_id, rdc_field_value* value); + void get_ecc_total(uint32_t gpu_index, rdc_field_t field_id, rdc_field_value* value); //!< return true if starting async_get bool async_get_pcie_throughput(uint32_t gpu_index, rdc_field_t field_id, rdc_field_value* value); diff --git a/include/rdc_lib/impl/RdcRasLib.h b/include/rdc_lib/impl/RdcRasLib.h deleted file mode 100644 index 329e4ba0ec..0000000000 --- a/include/rdc_lib/impl/RdcRasLib.h +++ /dev/null @@ -1,89 +0,0 @@ -/* -Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -#ifndef INCLUDE_RDC_LIB_IMPL_RDCRASLIB_H_ -#define INCLUDE_RDC_LIB_IMPL_RDCRASLIB_H_ - -#include -#include -#include -#include -#include -#include - -#include "rdc_lib/RdcDiagnostic.h" -#include "rdc_lib/RdcLibraryLoader.h" -#include "rdc_lib/RdcTelemetry.h" - -namespace amd { -namespace rdc { -class RdcRasLib : public RdcTelemetry, public RdcDiagnostic { - public: - // get support field ids - rdc_status_t rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FIELDS], - uint32_t* field_count) override; - - // Fetch - rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, uint32_t fields_count, - rdc_field_value_f callback, void* user_data) override; - - rdc_status_t rdc_telemetry_fields_watch(rdc_gpu_field_t* fields, uint32_t fields_count) override; - - rdc_status_t rdc_telemetry_fields_unwatch(rdc_gpu_field_t* fields, - uint32_t fields_count) override; - - rdc_status_t rdc_diag_test_cases_query(rdc_diag_test_cases_t test_cases[MAX_TEST_CASES], - uint32_t* test_case_count) override; - - // Run a specific test case - rdc_status_t rdc_test_case_run(rdc_diag_test_cases_t test_case, - uint32_t gpu_index[RDC_MAX_NUM_DEVICES], uint32_t gpu_count, - const char* config, size_t config_size, - rdc_diag_test_result_t* result) override; - - rdc_status_t rdc_diagnostic_run(const rdc_group_info_t& gpus, rdc_diag_level_t level, - const char* config, size_t config_size, - rdc_diag_response_t* response) override; - - rdc_status_t rdc_diag_init(uint64_t flags) override; - rdc_status_t rdc_diag_destroy() override; - - RdcRasLib(); - - ~RdcRasLib(); - - private: - RdcLibraryLoader lib_loader_; - rdc_status_t (*fields_value_get_)(rdc_gpu_field_t*, uint32_t, rdc_field_value_f, void*); - rdc_status_t (*fields_query_)(uint32_t[MAX_NUM_FIELDS], uint32_t*); - - rdc_status_t (*fields_watch_)(rdc_gpu_field_t*, uint32_t); - rdc_status_t (*fields_unwatch_)(rdc_gpu_field_t*, uint32_t); - - rdc_status_t (*rdc_module_init_)(uint64_t); - rdc_status_t (*rdc_module_destroy_)(); -}; -typedef std::shared_ptr RdcRasLibPtr; - -} // namespace rdc -} // namespace amd - -#endif // INCLUDE_RDC_LIB_IMPL_RDCRASLIB_H_ diff --git a/include/rdc_lib/impl/RdcTelemetryModule.h b/include/rdc_lib/impl/RdcTelemetryModule.h index cae45af4c0..19e39ed28d 100644 --- a/include/rdc_lib/impl/RdcTelemetryModule.h +++ b/include/rdc_lib/impl/RdcTelemetryModule.h @@ -29,7 +29,6 @@ THE SOFTWARE. #include "rdc_lib/RdcMetricFetcher.h" #include "rdc_lib/RdcTelemetry.h" -#include "rdc_lib/impl/RdcRasLib.h" #include "rdc_lib/impl/RdcSmiLib.h" namespace amd { diff --git a/ras_prebuild/config/arcturus.json b/ras_prebuild/config/arcturus.json deleted file mode 100644 index cc4cc78d94..0000000000 --- a/ras_prebuild/config/arcturus.json +++ /dev/null @@ -1,5680 +0,0 @@ -{ - "version": "0.0.1", - "type": { - "parity": 1, - "single_correctable": 2, - "multi_uncorrectable": 4, - "poison": 8 - }, - "block": { - "umc": { - "index": 0, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "sdma": { - "index": 1, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx": { - "index": 2, - "support": 1, - "subblock": { - "gfx_cpc_scratch": { - "index": 0, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_cpc_ucode": { - "index": 1, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_cpc_me1_dc_state": { - "index": 2, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_cpc_me1_dc_csinvoc": { - "index": 3, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_cpc_me1_dc_restore": { - "index": 4, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_cpc_me1_dc_csinvoc1": { - "index": 5, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_cpc_me2_dc_state": { - "index": 6, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_cpc_me2_dc_csinvoc": { - "index": 7, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_cpc_me2_dc_restore": { - "index": 8, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_cpf_me2_roq": { - "index": 9, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_cpf_me1_roq": { - "index": 10, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_cpf_tciu_tag": { - "index": 11, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_gds_mem": { - "index": 12, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_gds_input_queue": { - "index": 13, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_gds_oa_phy_cmd_ram_mem": { - "index": 14, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_gds_oa_phy_data_ram_mem": { - "index": 15, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_gds_oa_pipe_mem": { - "index": 16, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_spi_sr_mem": { - "index": 17, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_spi_gds_expreq_mem": { - "index": 18, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_spi_wb_grant_30_mem": { - "index": 19, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_spi_wb_grant_61_mem": { - "index": 20, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_spi_life_cnt_mem": { - "index": 21, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sq_sgpr": { - "index": 22, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sq_lds_d": { - "index": 23, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sq_lds_i": { - "index": 24, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sq_vgpr": { - "index": 25, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_inst_utcl1_lfifo": { - "index": 26, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_data_cu0_write_data_buf": { - "index": 27, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_data_cu0_utcl1_lfifo": { - "index": 28, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_data_cu1_write_data_buf": { - "index": 29, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_data_cu1_utcl1_lfifo": { - "index": 30, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_data_cu2_write_data_buf": { - "index": 31, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_data_cu2_utcl1_lfifo": { - "index": 32, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_banka_inst_tag_ram": { - "index": 33, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_banka_inst_utcl1_miss_fifo": { - "index": 34, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_banka_inst_miss_fifo": { - "index": 35, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_banka_inst_bank_ram": { - "index": 36, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_banka_data_tag_ram": { - "index": 37, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_banka_data_hit_fifo": { - "index": 38, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_banka_data_miss_fifo": { - "index": 39, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_banka_data_dirty_bit_ram": { - "index": 40, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_banka_data_bank_ram": { - "index": 41, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_bankb_inst_tag_ram": { - "index": 42, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_bankb_inst_utcl1_miss_fifo": { - "index": 43, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_bankb_inst_miss_fifo": { - "index": 44, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_bankb_inst_bank_ram": { - "index": 45, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_bankb_data_tag_ram": { - "index": 46, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_bankb_data_hit_fifo": { - "index": 47, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_bankb_data_miss_fifo": { - "index": 48, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_bankb_data_dirty_bit_ram": { - "index": 49, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_bankb_data_bank_ram": { - "index": 50, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ta_fs_dfifo": { - "index": 51, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ta_fs_afifo": { - "index": 52, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ta_fl_lfifo": { - "index": 53, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ta_fx_lfifo": { - "index": 54, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ta_fs_cfifo": { - "index": 55, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tca_hole_fifo": { - "index": 56, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tca_req_fifo": { - "index": 57, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_cache_data": { - "index": 58, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_cache_data_bank_0_1": { - "index": 59, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_cache_data_bank_1_0": { - "index": 60, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_cache_data_bank_1_1": { - "index": 61, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_cache_dirty_bank_0": { - "index": 62, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_cache_dirty_bank_1": { - "index": 63, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_high_rate_tag": { - "index": 64, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_low_rate_tag": { - "index": 65, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_in_use_dec": { - "index": 66, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_in_use_transfer": { - "index": 67, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_return_data": { - "index": 68, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_return_control": { - "index": 69, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_uc_atomic_fifo": { - "index": 70, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_write_return": { - "index": 71, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_write_cache_read": { - "index": 72, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_src_fifo": { - "index": 73, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_cache_tag_probe_fifo": { - "index": 74, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_latency_fifo": { - "index": 75, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_latency_fifo_next_ram": { - "index": 76, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tci_write_ram": { - "index": 77, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcp_cache_ram": { - "index": 78, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcp_lfifo_ram": { - "index": 79, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcp_cmd_fifo": { - "index": 80, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcp_vm_fifo": { - "index": 81, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcp_db_ram": { - "index": 82, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcp_utcl1_lfifo0": { - "index": 83, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcp_utcl1_lfifo1": { - "index": 84, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_td_ss_fifo_lo": { - "index": 85, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_td_ss_fifo_hi": { - "index": 86, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_td_cs_fifo": { - "index": 87, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_dramrd_cmdmem": { - "index": 88, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_dramwr_cmdmem": { - "index": 89, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_dramwr_datamem": { - "index": 90, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_rret_tagmem": { - "index": 91, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_wret_tagmem": { - "index": 92, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_gmird_cmdmem": { - "index": 93, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_gmiwr_cmdmem": { - "index": 94, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_gmiwr_datamem": { - "index": 95, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_dramrd_pagemem": { - "index": 96, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_dramwr_pagemem": { - "index": 97, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_iord_cmdmem": { - "index": 98, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_iowr_cmdmem": { - "index": 99, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_iowr_datamem": { - "index": 100, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_gmird_pagemem": { - "index": 101, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_gmiwr_pagemem": { - "index": 102, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_mam_d0mem": { - "index": 103, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_mam_d1mem": { - "index": 104, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_mam_d2mem": { - "index": 105, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_mam_d3mem": { - "index": 106, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_mam_a0mem": { - "index": 107, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_mam_a1mem": { - "index": 108, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_mam_a2mem": { - "index": 109, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_mam_a3mem": { - "index": 110, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_mam_afmem": { - "index": 111, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "utc_vml2_bank_cache": { - "index": 112, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "utc_vml2_walker": { - "index": 113, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "utcl2_router": { - "index": 114, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "utc_atcl2_cache_2m_bank": { - "index": 115, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "utc_atcl2_cache_4k_bank": { - "index": 116, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "rlcg_instr_ram": { - "index": 117, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "rlcg_scratch_ram": { - "index": 118, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "rlcv_instr_ram": { - "index": 119, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "rlcv_scratch_ram": { - "index": 120, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "rlc_tctag_ram": { - "index": 121, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "rlc_spm_scratch_ram": { - "index": 122, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "rlc_srm_data_ram": { - "index": 123, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "rlc_srm_addr_ram": { - "index": 124, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "rlc_spm_se0_scratch_ram": { - "index": 125, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "rlc_spm_se1_scratch_ram": { - "index": 126, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "rlc_spm_se2_scratch_ram": { - "index": 127, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "rlc_spm_se3_scratch_ram": { - "index": 128, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "rlc_spm_se4_scratch_ram": { - "index": 129, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "rlc_spm_se5_scratch_ram": { - "index": 130, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "rlc_spm_se6_scratch_ram": { - "index": 131, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "rlc_spm_se7_scratch_ram": { - "index": 132, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - } - } - }, - "mmhub": { - "index": 3, - "support": 1, - "subblock": { - "mmea0_dramrd_cmdmem": { - "index": 0, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea0_dramwr_cmdmem": { - "index": 1, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea0_dramwr_datamem": { - "index": 2, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea0_rret_tagmem": { - "index": 3, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea0_wret_tagmem": { - "index": 4, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea0_gmird_cmdmem": { - "index": 5, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea0_gmiwr_cmdmem": { - "index": 6, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea0_gmiwr_datamem": { - "index": 7, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea0_dramrd_pagemem": { - "index": 8, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea0_dramwr_pagemem": { - "index": 9, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea0_iord_cmdmem": { - "index": 10, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea0_iowr_cmdmem": { - "index": 11, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea0_iowr_datamem": { - "index": 12, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea0_gmird_pagemem": { - "index": 13, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea0_gmiwr_pagemem": { - "index": 14, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea0_mam_d0mem": { - "index": 15, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea0_mam_d1mem": { - "index": 16, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea0_mam_d2mem": { - "index": 17, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea0_mam_d3mem": { - "index": 18, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea1_dramrd_cmdmem": { - "index": 19, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea1_dramwr_cmdmem": { - "index": 20, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea1_dramwr_datamem": { - "index": 21, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea1_rret_tagmem": { - "index": 22, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea1_wret_tagmem": { - "index": 23, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea1_gmird_cmdmem": { - "index": 24, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea1_gmiwr_cmdmem": { - "index": 25, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea1_gmiwr_datamem": { - "index": 26, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea1_dramrd_pagemem": { - "index": 27, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea1_dramwr_pagemem": { - "index": 28, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea1_iord_cmdmem": { - "index": 29, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea1_iowr_cmdmem": { - "index": 30, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea1_iowr_datamem": { - "index": 31, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea1_gmird_pagemem": { - "index": 32, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea1_gmiwr_pagemem": { - "index": 33, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea1_mam_d0mem": { - "index": 34, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea1_mam_d1mem": { - "index": 35, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea1_mam_d2mem": { - "index": 36, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea1_mam_d3mem": { - "index": 37, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea2_dramrd_cmdmem": { - "index": 38, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea2_dramwr_cmdmem": { - "index": 39, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea2_dramwr_datamem": { - "index": 40, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea2_rret_tagmem": { - "index": 41, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea2_wret_tagmem": { - "index": 42, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea2_gmird_cmdmem": { - "index": 43, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea2_gmiwr_cmdmem": { - "index": 44, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea2_gmiwr_datamem": { - "index": 45, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea2_dramrd_pagemem": { - "index": 46, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea2_dramwr_pagemem": { - "index": 47, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea2_iord_cmdmem": { - "index": 48, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea2_iowr_cmdmem": { - "index": 49, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea2_iowr_datamem": { - "index": 50, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea2_gmird_pagemem": { - "index": 51, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea2_gmiwr_pagemem": { - "index": 52, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea2_mam_d0mem": { - "index": 53, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea2_mam_d1mem": { - "index": 54, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea2_mam_d2mem": { - "index": 55, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea2_mam_d3mem": { - "index": 56, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea3_dramrd_cmdmem": { - "index": 57, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea3_dramwr_cmdmem": { - "index": 58, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea3_dramwr_datamem": { - "index": 59, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea3_rret_tagmem": { - "index": 60, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea3_wret_tagmem": { - "index": 61, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea3_gmird_cmdmem": { - "index": 62, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea3_gmiwr_cmdmem": { - "index": 63, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea3_gmiwr_datamem": { - "index": 64, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea3_dramrd_pagemem": { - "index": 65, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea3_dramwr_pagemem": { - "index": 66, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea3_iord_cmdmem": { - "index": 67, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea3_iowr_cmdmem": { - "index": 68, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea3_iowr_datamem": { - "index": 69, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea3_gmird_pagemem": { - "index": 70, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea3_gmiwr_pagemem": { - "index": 71, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea3_mam_d0mem": { - "index": 72, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea3_mam_d1mem": { - "index": 73, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea3_mam_d2mem": { - "index": 74, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea3_mam_d3mem": { - "index": 75, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea4_dramrd_cmdmem": { - "index": 76, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea4_dramwr_cmdmem": { - "index": 77, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea4_dramwr_datamem": { - "index": 78, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea4_rret_tagmem": { - "index": 79, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea4_wret_tagmem": { - "index": 80, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea4_gmird_cmdmem": { - "index": 81, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea4_gmiwr_cmdmem": { - "index": 82, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea4_gmiwr_datamem": { - "index": 83, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea4_dramrd_pagemem": { - "index": 84, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea4_dramwr_pagemem": { - "index": 85, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea4_iord_cmdmem": { - "index": 86, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea4_iowr_cmdmem": { - "index": 87, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea4_iowr_datamem": { - "index": 88, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea4_gmird_pagemem": { - "index": 89, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea4_gmiwr_pagemem": { - "index": 90, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea4_mam_d0mem": { - "index": 91, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea4_mam_d1mem": { - "index": 92, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea4_mam_d2mem": { - "index": 93, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea4_mam_d3mem": { - "index": 94, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea5_dramrd_cmdmem": { - "index": 95, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea5_dramwr_cmdmem": { - "index": 96, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea5_dramwr_datamem": { - "index": 97, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea5_rret_tagmem": { - "index": 98, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea5_wret_tagmem": { - "index": 99, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea5_gmird_cmdmem": { - "index": 100, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea5_gmiwr_cmdmem": { - "index": 101, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea5_gmiwr_datamem": { - "index": 102, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea5_dramrd_pagemem": { - "index": 103, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea5_dramwr_pagemem": { - "index": 104, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea5_iord_cmdmem": { - "index": 105, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea5_iowr_cmdmem": { - "index": 106, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea5_iowr_datamem": { - "index": 107, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea5_gmird_pagemem": { - "index": 108, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea5_gmiwr_pagemem": { - "index": 109, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea5_mam_d0mem": { - "index": 110, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea5_mam_d1mem": { - "index": 111, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea5_mam_d2mem": { - "index": 112, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea5_mam_d3mem": { - "index": 113, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea6_dramrd_cmdmem": { - "index": 114, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea6_dramwr_cmdmem": { - "index": 115, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea6_dramwr_datamem": { - "index": 116, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea6_rret_tagmem": { - "index": 117, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea6_wret_tagmem": { - "index": 118, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea6_gmird_cmdmem": { - "index": 119, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea6_gmiwr_cmdmem": { - "index": 120, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea6_gmiwr_datamem": { - "index": 121, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea6_dramrd_pagemem": { - "index": 122, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea6_dramwr_pagemem": { - "index": 123, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea6_iord_cmdmem": { - "index": 124, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea6_iowr_cmdmem": { - "index": 125, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea6_iowr_datamem": { - "index": 126, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea6_gmird_pagemem": { - "index": 127, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea6_gmiwr_pagemem": { - "index": 128, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea6_mam_d0mem": { - "index": 129, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea6_mam_d1mem": { - "index": 130, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea6_mam_d2mem": { - "index": 131, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea6_mam_d3mem": { - "index": 132, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea7_dramrd_cmdmem": { - "index": 133, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea7_dramwr_cmdmem": { - "index": 134, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea7_dramwr_datamem": { - "index": 135, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea7_rret_tagmem": { - "index": 136, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea7_wret_tagmem": { - "index": 137, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea7_gmird_cmdmem": { - "index": 138, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea7_gmiwr_cmdmem": { - "index": 139, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea7_gmiwr_datamem": { - "index": 140, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea7_dramrd_pagemem": { - "index": 141, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea7_dramwr_pagemem": { - "index": 142, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea7_iord_cmdmem": { - "index": 143, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea7_iowr_cmdmem": { - "index": 144, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea7_iowr_datamem": { - "index": 145, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea7_gmird_pagemem": { - "index": 146, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea7_gmiwr_pagemem": { - "index": 147, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea7_mam_d0mem": { - "index": 148, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea7_mam_d1mem": { - "index": 149, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea7_mam_d2mem": { - "index": 150, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mmea7_mam_d3mem": { - "index": 151, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - } - } - }, - "athub": { - "index": 4, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "pcie_bif": { - "index": 5, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "hdp": { - "index": 6, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "xgmi_wafl": { - "index": 7, - "support": 1, - "method" : { - "crc": 2, - "data_parity": 6, - "replay_overflow": 7 - }, - "subblock": { - "xgmi": { - "index": 0, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable" - ] - }, - "wafl": { - "index": 1, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable" - ] - } - } - }, - "df": { - "index": 8, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "smn": { - "index": 9, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "sem": { - "index": 10, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mp0": { - "index": 11, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mp1": { - "index": 12, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "fuse": { - "index": 13, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - } - }, - "tests": [ - { - "name": "ras_umc.0.2", - "block": "umc", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "status": "Error count change" - }, - { - "name": "ras_umc.0.4", - "block": "umc", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "status": "GPU Hang" - }, - { - "name": "ras_gfx.0.2", - "block": "gfx", - "subblock": "gfx_cpc_scratch", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.0.4", - "block": "gfx", - "subblock": "gfx_cpc_scratch", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.0.8", - "block": "gfx", - "subblock": "gfx_cpc_scratch", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.1.2", - "block": "gfx", - "subblock": "gfx_cpc_ucode", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/bin/gc_edc_tc_02.bin", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.1.4", - "block": "gfx", - "subblock": "gfx_cpc_ucode", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.1.8", - "block": "gfx", - "subblock": "gfx_cpc_ucode", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.2.2", - "block": "gfx", - "subblock": "gfx_cpc_me1_dc_state", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.2.4", - "block": "gfx", - "subblock": "gfx_cpc_me1_dc_state", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.2.8", - "block": "gfx", - "subblock": "gfx_cpc_me1_dc_state", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.3.2", - "block": "gfx", - "subblock": "gfx_cpc_me1_dc_csinvoc", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "gridX": "4", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.3.4", - "block": "gfx", - "subblock": "gfx_cpc_me1_dc_csinvoc", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.3.8", - "block": "gfx", - "subblock": "gfx_cpc_me1_dc_csinvoc", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.4.2", - "block": "gfx", - "subblock": "gfx_cpc_me1_dc_restore", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_dc_restore.bin", - "status": "KFD context save/restore failed" - }, - { - "name": "ras_gfx.4.4", - "block": "gfx", - "subblock": "gfx_cpc_me1_dc_restore", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_dc_restore.bin", - "status": "KFD context save/restore failed" - }, - { - "name": "ras_gfx.4.8", - "block": "gfx", - "subblock": "gfx_cpc_me1_dc_restore", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_dc_restore.bin", - "status": "KFD context save/restore failed" - }, - { - "name": "ras_gfx.5.2", - "block": "gfx", - "subblock": "gfx_cpc_me1_dc_csinvoc1", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "gridX": "4", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.5.4", - "block": "gfx", - "subblock": "gfx_cpc_me1_dc_csinvoc1", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.5.8", - "block": "gfx", - "subblock": "gfx_cpc_me1_dc_csinvoc1", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.6.1", - "block": "gfx", - "subblock": "gfx_cpc_me2_dc_state", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_dc_restore.bin", - "backend": "kfd", - "queue": "compute", - "skip": "1", - "status": "No EDC counter, no support" - }, - { - "name": "ras_gfx.7.1", - "block": "gfx", - "subblock": "gfx_cpc_me2_dc_csinvoc", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_dc_restore.bin", - "backend": "kfd", - "queue": "compute", - "skip": "1", - "status": "No EDC counter, no support" - }, - { - "name": "ras_gfx.8.1", - "block": "gfx", - "subblock": "gfx_cpc_me2_dc_restore", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_dc_restore.bin", - "backend": "kfd", - "queue": "compute", - "skip": "1", - "status": "No EDC counter, no support" - }, - { - "name": "ras_gfx.9.2", - "block": "gfx", - "subblock": "gfx_cpf_me2_roq", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.9.4", - "block": "gfx", - "subblock": "gfx_cpf_me2_roq", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.9.8", - "block": "gfx", - "subblock": "gfx_cpf_me2_roq", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.10.2", - "block": "gfx", - "subblock": "gfx_cpf_me1_roq", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.10.4", - "block": "gfx", - "subblock": "gfx_cpf_me1_roq", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.10.8", - "block": "gfx", - "subblock": "gfx_cpf_me1_roq", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.11.2", - "block": "gfx", - "subblock": "gfx_cpf_tciu_tag", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.11.4", - "block": "gfx", - "subblock": "gfx_cpf_tciu_tag", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.11.8", - "block": "gfx", - "subblock": "gfx_cpf_tciu_tag", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.12.2", - "block": "gfx", - "subblock": "gfx_gds_mem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_gds_02.bin", - "addrType": "mtype_gds", - "backend": "drm", - "status": "Fail: No Graphic Ring" - }, - { - "name": "ras_gfx.12.4", - "block": "gfx", - "subblock": "gfx_gds_mem", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_gds_02.bin", - "addrType": "mtype_gds", - "backend": "drm", - "status": "Fail: No Graphic Ring" - }, - { - "name": "ras_gfx.12.8", - "block": "gfx", - "subblock": "gfx_gds_mem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_gds_02.bin", - "addrType": "mtype_gds", - "backend": "drm", - "status": "Fail: No Graphic Ring" - }, - { - "name": "ras_gfx.13.1", - "block": "gfx", - "subblock": "gfx_gds_input_queue", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_gds_02.bin", - "backend": "drm", - "skip": "1", - "status": "No EDC counter, no support" - }, - { - "name": "ras_gfx.14.2", - "block": "gfx", - "subblock": "gfx_gds_oa_phy_cmd_ram_mem", - "type": "single_correctable", - "gdsOaPhy": "1", - "workGroupSizeX": "64", - "workGroupSizeY": "6", - "backend": "drm", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.14.4", - "block": "gfx", - "subblock": "gfx_gds_oa_phy_cmd_ram_mem", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_gds_oa_phy.bin", - "gdsOaPhy": "1", - "workGroupSizeX": "64", - "workGroupSizeY": "6", - "backend": "drm", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.14.8", - "block": "gfx", - "subblock": "gfx_gds_oa_phy_cmd_ram_mem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_gds_oa_phy.bin", - "gdsOaPhy": "1", - "workGroupSizeX": "64", - "workGroupSizeY": "6", - "backend": "drm", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.15.2", - "block": "gfx", - "subblock": "gfx_gds_oa_phy_data_ram_mem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_gds_oa_phy.bin", - "gdsOaPhy": "1", - "workGroupSizeX": "64", - "workGroupSizeY": "6", - "backend": "drm", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.15.4", - "block": "gfx", - "subblock": "gfx_gds_oa_phy_data_ram_mem", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_gds_oa_phy.bin", - "gdsOaPhy": "1", - "workGroupSizeX": "64", - "workGroupSizeY": "6", - "backend": "drm", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.15.8", - "block": "gfx", - "subblock": "gfx_gds_oa_phy_data_ram_mem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_gds_oa_phy.bin", - "gdsOaPhy": "1", - "workGroupSizeX": "64", - "workGroupSizeY": "6", - "backend": "drm", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.16.2", - "block": "gfx", - "subblock": "gfx_gds_oa_pipe_mem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_gds_oa_phy.bin", - "gdsOaPhy": "1", - "workGroupSizeX": "64", - "workGroupSizeY": "6", - "backend": "drm", - "status": "Fail" - }, - { - "name": "ras_gfx.16.4", - "block": "gfx", - "subblock": "gfx_gds_oa_pipe_mem", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_gds_oa_phy.bin", - "gdsOaPhy": "1", - "workGroupSizeX": "64", - "workGroupSizeY": "6", - "backend": "drm", - "status": "Fail" - }, - { - "name": "ras_gfx.16.8", - "block": "gfx", - "subblock": "gfx_gds_oa_pipe_mem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_gds_oa_phy.bin", - "gdsOaPhy": "1", - "workGroupSizeX": "64", - "workGroupSizeY": "6", - "backend": "drm", - "status": "Fail" - }, - { - "name": "ras_gfx.17.2", - "block": "gfx", - "subblock": "gfx_spi_sr_mem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_compute.bin", - "backend": "kfd", - "queue": "compute", - "gridX": "256", - "workGroupSizeX": "1", - "workGroupSizeY": "1", - "workGroupSizeZ": "1", - "maxQueueNum": "1", - "status": "Fail: The cp might be in an unrecoverable state due to an unsuccessful queues preemption" - }, - { - "name": "ras_gfx.17.2", - "block": "gfx", - "subblock": "gfx_spi_sr_mem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_compute.bin", - "backend": "kfd", - "queue": "compute", - "gridX": "256", - "workGroupSizeX": "1", - "workGroupSizeY": "1", - "workGroupSizeZ": "1", - "maxQueueNum": "1", - "status": "Fail: The cp might be in an unrecoverable state due to an unsuccessful queues preemption" - }, - { - "name": "ras_gfx.17.2", - "block": "gfx", - "subblock": "gfx_spi_sr_mem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_compute.bin", - "backend": "kfd", - "queue": "compute", - "gridX": "256", - "workGroupSizeX": "1", - "workGroupSizeY": "1", - "workGroupSizeZ": "1", - "maxQueueNum": "1", - "status": "Fail: The cp might be in an unrecoverable state due to an unsuccessful queues preemption" - }, - { - "name": "ras_gfx.18.2", - "block": "gfx", - "subblock": "gfx_spi_gds_expreq_mem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_gds_02.bin", - "addrType": "mtype_gds", - "backend": "drm", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.18.4", - "block": "gfx", - "subblock": "gfx_spi_gds_expreq_mem", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_gds_02.bin", - "addrType": "mtype_gds", - "backend": "drm", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.18.8", - "block": "gfx", - "subblock": "gfx_spi_gds_expreq_mem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_gds_02.bin", - "addrType": "mtype_gds", - "backend": "drm", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.19.2", - "block": "gfx", - "subblock": "gfx_spi_wb_grant_30_mem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_cp_sq.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "1", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.19.4", - "block": "gfx", - "subblock": "gfx_spi_wb_grant_30_mem", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_cp_sq.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "1", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.19.8", - "block": "gfx", - "subblock": "gfx_spi_wb_grant_30_mem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_cp_sq.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "1", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.20.2", - "block": "gfx", - "subblock": "gfx_spi_wb_grant_61_mem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_compute.bin", - "backend": "kfd", - "queue": "compute", - "se0_cu_mask": "1", - "se1_cu_mask": "0", - "se2_cu_mask": "0", - "se3_cu_mask": "0", - "se4_cu_mask": "0", - "se5_cu_mask": "0", - "se6_cu_mask": "0", - "se7_cu_mask": "0", - "simd_mask": "1", - "gridX": "8192", - "workGroupSizeX": "64", - "workGroupSizeY": "4", - "workGroupSizeZ": "1", - "maxQueueNum": "1", - "status": "Fail" - }, - { - "name": "ras_gfx.21.2", - "block": "gfx", - "subblock": "gfx_spi_life_cnt_mem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_compute.bin", - "gridX": "256", - "gridY": "1", - "gridZ": "1", - "workGroupSizeX": "1", - "workGroupSizeY": "1", - "workGroupSizeZ": "1", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "1", - "status": "Fail" - }, - { - "name": "ras_gfx.22.2", - "block": "gfx", - "subblock": "gfx_sq_sgpr", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sgpr_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.23.2", - "block": "gfx", - "subblock": "gfx_sq_lds_d", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_lds_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.23.4", - "block": "gfx", - "subblock": "gfx_sq_lds_d", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_lds_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.23.8", - "block": "gfx", - "subblock": "gfx_sq_lds_d", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_lds_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.24.2", - "block": "gfx", - "subblock": "gfx_sq_lds_i", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_lds_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.24.4", - "block": "gfx", - "subblock": "gfx_sq_lds_i", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_lds_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.24.8", - "block": "gfx", - "subblock": "gfx_sq_lds_i", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_lds_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.25.2", - "block": "gfx", - "subblock": "gfx_sq_vgpr", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_vgpr_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Fail" - }, - { - "name": "ras_gfx.26.2", - "block": "gfx", - "subblock": "gfx_sqc_inst_utcl1_lfifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.26.4", - "block": "gfx", - "subblock": "gfx_sqc_inst_utcl1_lfifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.26.8", - "block": "gfx", - "subblock": "gfx_sqc_inst_utcl1_lfifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.27.2", - "block": "gfx", - "subblock": "gfx_sqc_data_cu0_write_data_buf", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.27.4", - "block": "gfx", - "subblock": "gfx_sqc_data_cu0_write_data_buf", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.27.8", - "block": "gfx", - "subblock": "gfx_sqc_data_cu0_write_data_buf", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.28.2", - "block": "gfx", - "subblock": "gfx_sqc_data_cu0_utcl1_lfifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sh_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "0", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.28.4", - "block": "gfx", - "subblock": "gfx_sqc_data_cu0_utcl1_lfifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sh_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "0", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.28.8", - "block": "gfx", - "subblock": "gfx_sqc_data_cu0_utcl1_lfifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sh_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "0", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.29.2", - "block": "gfx", - "subblock": "gfx_sqc_data_cu1_write_data_buf", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "1", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.29.4", - "block": "gfx", - "subblock": "gfx_sqc_data_cu1_write_data_buf", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "1", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.29.8", - "block": "gfx", - "subblock": "gfx_sqc_data_cu1_write_data_buf", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "1", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.30.2", - "block": "gfx", - "subblock": "gfx_sqc_data_cu1_utcl1_lfifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sh_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "1", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.30.4", - "block": "gfx", - "subblock": "gfx_sqc_data_cu1_utcl1_lfifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sh_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "1", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.30.8", - "block": "gfx", - "subblock": "gfx_sqc_data_cu1_utcl1_lfifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sh_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "1", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.31.2", - "block": "gfx", - "subblock": "gfx_sqc_data_cu2_write_data_buf", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "2", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.31.4", - "block": "gfx", - "subblock": "gfx_sqc_data_cu2_write_data_buf", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "2", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.31.8", - "block": "gfx", - "subblock": "gfx_sqc_data_cu2_write_data_buf", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "2", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.32.2", - "block": "gfx", - "subblock": "gfx_sqc_data_cu2_utcl1_lfifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sh_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "2", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.32.4", - "block": "gfx", - "subblock": "gfx_sqc_data_cu2_utcl1_lfifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sh_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "2", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.32.8", - "block": "gfx", - "subblock": "gfx_sqc_data_cu2_utcl1_lfifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sh_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "2", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.33.2", - "block": "gfx", - "subblock": "gfx_sqc_banka_inst_tag_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "Fail" - }, - { - "name": "ras_gfx.34.2", - "block": "gfx", - "subblock": "gfx_sqc_banka_inst_utcl1_miss_fifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.34.4", - "block": "gfx", - "subblock": "gfx_sqc_banka_inst_utcl1_miss_fifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.34.8", - "block": "gfx", - "subblock": "gfx_sqc_banka_inst_utcl1_miss_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.35.2", - "block": "gfx", - "subblock": "gfx_sqc_banka_inst_miss_fifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.35.4", - "block": "gfx", - "subblock": "gfx_sqc_banka_inst_miss_fifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.35.8", - "block": "gfx", - "subblock": "gfx_sqc_banka_inst_miss_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.36.2", - "block": "gfx", - "subblock": "gfx_sqc_banka_inst_bank_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.36.4", - "block": "gfx", - "subblock": "gfx_sqc_banka_inst_bank_ram", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.36.8", - "block": "gfx", - "subblock": "gfx_sqc_banka_inst_bank_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.37.2", - "block": "gfx", - "subblock": "gfx_sqc_banka_data_tag_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank_tag.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.38.2", - "block": "gfx", - "subblock": "gfx_sqc_banka_data_hit_fifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank_tag.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.38.4", - "block": "gfx", - "subblock": "gfx_sqc_banka_data_hit_fifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank_tag.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.38.8", - "block": "gfx", - "subblock": "gfx_sqc_banka_data_hit_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank_tag.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.39.2", - "block": "gfx", - "subblock": "gfx_sqc_banka_data_miss_fifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.39.4", - "block": "gfx", - "subblock": "gfx_sqc_banka_data_miss_fifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.39.8", - "block": "gfx", - "subblock": "gfx_sqc_banka_data_miss_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.40.1", - "block": "gfx", - "subblock": "gfx_sqc_banka_data_dirty_bit_ram", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_dirty_bit.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "1", - "addrType": "mtype_uc_sys", - "skip": "1", - "status": "N/A" - }, - { - "name": "ras_gfx.41.2", - "block": "gfx", - "subblock": "gfx_sqc_banka_data_bank_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.41.4", - "block": "gfx", - "subblock": "gfx_sqc_banka_data_bank_ram", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.41.8", - "block": "gfx", - "subblock": "gfx_sqc_banka_data_bank_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.42.2", - "block": "gfx", - "subblock": "gfx_sqc_bankb_inst_tag_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.43.2", - "block": "gfx", - "subblock": "gfx_sqc_bankb_inst_utcl1_miss_fifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.43.4", - "block": "gfx", - "subblock": "gfx_sqc_bankb_inst_utcl1_miss_fifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.43.8", - "block": "gfx", - "subblock": "gfx_sqc_bankb_inst_utcl1_miss_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.44.2", - "block": "gfx", - "subblock": "gfx_sqc_bankb_inst_miss_fifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.44.4", - "block": "gfx", - "subblock": "gfx_sqc_bankb_inst_miss_fifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.44.8", - "block": "gfx", - "subblock": "gfx_sqc_bankb_inst_miss_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.45.2", - "block": "gfx", - "subblock": "gfx_sqc_bankb_inst_bank_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.45.4", - "block": "gfx", - "subblock": "gfx_sqc_bankb_inst_bank_ram", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.45.8", - "block": "gfx", - "subblock": "gfx_sqc_bankb_inst_bank_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.46.2", - "block": "gfx", - "subblock": "gfx_sqc_bankb_data_tag_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank_tag.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.47.2", - "block": "gfx", - "subblock": "gfx_sqc_bankb_data_hit_fifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank_tag.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.47.4", - "block": "gfx", - "subblock": "gfx_sqc_bankb_data_hit_fifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank_tag.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.47.8", - "block": "gfx", - "subblock": "gfx_sqc_bankb_data_hit_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank_tag.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.48.2", - "block": "gfx", - "subblock": "gfx_sqc_bankb_data_miss_fifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.48.4", - "block": "gfx", - "subblock": "gfx_sqc_bankb_data_miss_fifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.48.8", - "block": "gfx", - "subblock": "gfx_sqc_bankb_data_miss_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.49.1", - "block": "gfx", - "subblock": "gfx_sqc_bankb_data_dirty_bit_ram", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_dirty_bit.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "1", - "addrType": "mtype_uc_sys", - "skip": "1", - "status": "N/A" - }, - { - "name": "ras_gfx.50.2", - "block": "gfx", - "subblock": "gfx_sqc_bankb_data_bank_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.50.4", - "block": "gfx", - "subblock": "gfx_sqc_bankb_data_bank_ram", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.50.8", - "block": "gfx", - "subblock": "gfx_sqc_bankb_data_bank_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.51.2", - "block": "gfx", - "subblock": "gfx_ta_fs_dfifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.51.4", - "block": "gfx", - "subblock": "gfx_ta_fs_dfifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.51.8", - "block": "gfx", - "subblock": "gfx_ta_fs_dfifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.52.2", - "block": "gfx", - "subblock": "gfx_ta_fs_afifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.52.4", - "block": "gfx", - "subblock": "gfx_ta_fs_afifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.52.8", - "block": "gfx", - "subblock": "gfx_ta_fs_afifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.53.2", - "block": "gfx", - "subblock": "gfx_ta_fl_lfifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.53.4", - "block": "gfx", - "subblock": "gfx_ta_fl_lfifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.53.8", - "block": "gfx", - "subblock": "gfx_ta_fl_lfifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.54.2", - "block": "gfx", - "subblock": "gfx_ta_fx_lfifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.54.4", - "block": "gfx", - "subblock": "gfx_ta_fx_lfifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.54.8", - "block": "gfx", - "subblock": "gfx_ta_fx_lfifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.55.2", - "block": "gfx", - "subblock": "gfx_ta_fs_cfifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.55.4", - "block": "gfx", - "subblock": "gfx_ta_fs_cfifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.55.8", - "block": "gfx", - "subblock": "gfx_ta_fs_cfifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.56.2", - "block": "gfx", - "subblock": "gfx_tca_hole_fifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.56.4", - "block": "gfx", - "subblock": "gfx_tca_hole_fifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.56.8", - "block": "gfx", - "subblock": "gfx_tca_hole_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.57.1", - "block": "gfx", - "subblock": "gfx_tca_req_fifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "20", - "workGroupSizeX": "256", - "gridX": "16", - "COMPUTE_STATIC_THREAD_MGMT_SE0_sh0_cu_en": "255", - "status": "Fail" - }, - { - "name": "ras_gfx.58.2", - "block": "gfx", - "subblock": "gfx_tcc_cache_data", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.58.4", - "block": "gfx", - "subblock": "gfx_tcc_cache_data", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.58.8", - "block": "gfx", - "subblock": "gfx_tcc_cache_data", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.59.2", - "block": "gfx", - "subblock": "gfx_tcc_cache_data_bank_0_1", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.59.4", - "block": "gfx", - "subblock": "gfx_tcc_cache_data_bank_0_1", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.59.8", - "block": "gfx", - "subblock": "gfx_tcc_cache_data_bank_0_1", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.60.2", - "block": "gfx", - "subblock": "gfx_tcc_cache_data_bank_1_0", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.60.4", - "block": "gfx", - "subblock": "gfx_tcc_cache_data_bank_1_0", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.60.8", - "block": "gfx", - "subblock": "gfx_tcc_cache_data_bank_1_0", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.61.2", - "block": "gfx", - "subblock": "gfx_tcc_cache_data_bank_1_1", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.61.4", - "block": "gfx", - "subblock": "gfx_tcc_cache_data_bank_1_1", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.61.8", - "block": "gfx", - "subblock": "gfx_tcc_cache_data_bank_1_1", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.62.2", - "block": "gfx", - "subblock": "gfx_tcc_cache_dirty_bank_0", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change with internal injection" - }, - { - "name": "ras_gfx.63.2", - "block": "gfx", - "subblock": "gfx_tcc_cache_dirty_bank_1", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Error count change with internal injection" - }, - { - "name": "ras_gfx.64.2", - "block": "gfx", - "subblock": "gfx_tcc_high_rate_tag", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.64.4", - "block": "gfx", - "subblock": "gfx_tcc_high_rate_tag", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.64.8", - "block": "gfx", - "subblock": "gfx_tcc_high_rate_tag", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.65.2", - "block": "gfx", - "subblock": "gfx_tcc_low_rate_tag", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.65.4", - "block": "gfx", - "subblock": "gfx_tcc_low_rate_tag", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.65.8", - "block": "gfx", - "subblock": "gfx_tcc_low_rate_tag", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.66.2", - "block": "gfx", - "subblock": "gfx_tcc_in_use_dec", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.66.4", - "block": "gfx", - "subblock": "gfx_tcc_in_use_dec", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.66.8", - "block": "gfx", - "subblock": "gfx_tcc_in_use_dec", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.67.2", - "block": "gfx", - "subblock": "gfx_tcc_in_use_transfer", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.67.4", - "block": "gfx", - "subblock": "gfx_tcc_in_use_transfer", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.67.8", - "block": "gfx", - "subblock": "gfx_tcc_in_use_transfer", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.68.2", - "block": "gfx", - "subblock": "gfx_tcc_return_data", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.68.4", - "block": "gfx", - "subblock": "gfx_tcc_return_data", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.68.8", - "block": "gfx", - "subblock": "gfx_tcc_return_data", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.69.1", - "block": "gfx", - "subblock": "gfx_tcc_return_control", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Fail" - }, - { - "name": "ras_gfx.70.1", - "block": "gfx", - "subblock": "gfx_tcc_uc_atomic_fifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang with error count change, with internal injection" - }, - { - "name": "ras_gfx.71.1", - "block": "gfx", - "subblock": "gfx_tcc_write_return", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "GPU hang with error count change, with internal injection" - }, - { - "name": "ras_gfx.72.1", - "block": "gfx", - "subblock": "gfx_tcc_write_cache_read", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "GPU hang with error count change, with internal injection" - }, - { - "name": "ras_gfx.73.2", - "block": "gfx", - "subblock": "gfx_tcc_src_fifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Error count change with internal injection" - }, - { - "name": "ras_gfx.74.1", - "block": "gfx", - "subblock": "gfx_tcc_cache_tag_probe_fifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Error count change with internal injection" - }, - { - "name": "ras_gfx.75.2", - "block": "gfx", - "subblock": "gfx_tcc_latency_fifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "5", - "loop": "128", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.75.4", - "block": "gfx", - "subblock": "gfx_tcc_latency_fifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "5", - "loop": "128", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.75.8", - "block": "gfx", - "subblock": "gfx_tcc_latency_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "5", - "loop": "128", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.76.1", - "block": "gfx", - "subblock": "gfx_tcc_latency_fifo_next_ram", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "1", - "workGroupSizeX": "64", - "gridX": "4", - "addrType": "fb", - "loop": "16", - "status": "GPU hang without error count change, TA inject failed, should check with FW team" - }, - { - "name": "ras_gfx.77.1", - "block": "gfx", - "subblock": "gfx_tci_write_ram", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang without error count change, TA inject failed, should check with FW team" - }, - { - "name": "ras_gfx.78.2", - "block": "gfx", - "subblock": "gfx_tcp_cache_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change with glmark2" - }, - { - "name": "ras_gfx.79.2", - "block": "gfx", - "subblock": "gfx_tcp_lfifo_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.79.4", - "block": "gfx", - "subblock": "gfx_tcp_lfifo_ram", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.79.8", - "block": "gfx", - "subblock": "gfx_tcp_lfifo_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.80.2", - "block": "gfx", - "subblock": "gfx_tcp_cmd_fifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.80.4", - "block": "gfx", - "subblock": "gfx_tcp_cmd_fifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.80.8", - "block": "gfx", - "subblock": "gfx_tcp_cmd_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.81.2", - "block": "gfx", - "subblock": "gfx_tcp_vm_fifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.81.4", - "block": "gfx", - "subblock": "gfx_tcp_vm_fifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.81.8", - "block": "gfx", - "subblock": "gfx_tcp_vm_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.82.1", - "block": "gfx", - "subblock": "gfx_tcp_db_ram", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang with error count change, with glmark2" - }, - { - "name": "ras_gfx.83.2", - "block": "gfx", - "subblock": "gfx_tcp_utcl1_lfifo0", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tcp_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.83.4", - "block": "gfx", - "subblock": "gfx_tcp_utcl1_lfifo0", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tcp_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.83.8", - "block": "gfx", - "subblock": "gfx_tcp_utcl1_lfifo0", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tcp_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.84.2", - "block": "gfx", - "subblock": "gfx_tcp_utcl1_lfifo1", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tcp_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.84.4", - "block": "gfx", - "subblock": "gfx_tcp_utcl1_lfifo1", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tcp_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.84.8", - "block": "gfx", - "subblock": "gfx_tcp_utcl1_lfifo1", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tcp_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.85.2", - "block": "gfx", - "subblock": "gfx_td_ss_fifo_lo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "loop": "10", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.85.4", - "block": "gfx", - "subblock": "gfx_td_ss_fifo_lo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "loop": "10", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.85.8", - "block": "gfx", - "subblock": "gfx_td_ss_fifo_lo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "loop": "10", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.86.2", - "block": "gfx", - "subblock": "gfx_td_ss_fifo_hi", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "loop": "10", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.86.4", - "block": "gfx", - "subblock": "gfx_td_ss_fifo_hi", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "loop": "10", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.86.8", - "block": "gfx", - "subblock": "gfx_td_ss_fifo_hi", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "loop": "10", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.87.2", - "block": "gfx", - "subblock": "gfx_td_cs_fifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "loop": "10", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.87.4", - "block": "gfx", - "subblock": "gfx_td_cs_fifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "loop": "10", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.87.8", - "block": "gfx", - "subblock": "gfx_td_cs_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "loop": "10", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.88.2", - "block": "gfx", - "subblock": "gfx_ea_dramrd_cmdmem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_ea.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.88.4", - "block": "gfx", - "subblock": "gfx_ea_dramrd_cmdmem", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_ea.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.88.8", - "block": "gfx", - "subblock": "gfx_ea_dramrd_cmdmem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_ea.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.89.2", - "block": "gfx", - "subblock": "gfx_ea_dramwr_cmdmem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_ea_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.90.2", - "block": "gfx", - "subblock": "gfx_ea_dramwr_datamem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "internal": "1", - "status": "Error count change" - }, - { - "name": "ras_gfx.91.2", - "block": "gfx", - "subblock": "gfx_ea_rret_tagmem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.91.4", - "block": "gfx", - "subblock": "gfx_ea_rret_tagmem", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.91.8", - "block": "gfx", - "subblock": "gfx_ea_rret_tagmem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.92.2", - "block": "gfx", - "subblock": "gfx_ea_wret_tagmem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "status": "Fail" - }, - { - "name": "ras_gfx.93.2", - "block": "gfx", - "subblock": "gfx_ea_gmird_cmdmem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "64", - "addrType": "sys", - "status": "Fail" - }, - { - "name": "ras_gfx.94.2", - "block": "gfx", - "subblock": "gfx_ea_gmiwr_cmdmem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02_ea.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "64", - "addrType": "sys", - "status": "Fail" - }, - { - "name": "ras_gfx.95.2", - "block": "gfx", - "subblock": "gfx_ea_gmiwr_datamem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "64", - "addrType": "sys", - "status": "Fail" - }, - { - "name": "ras_gfx.96.1", - "block": "gfx", - "subblock": "gfx_ea_dramrd_pagemem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_ea_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "5", - "addrType": "mtype_uc_vid", - "status": "Fail" - }, - { - "name": "ras_gfx.97.1", - "block": "gfx", - "subblock": "gfx_ea_dramwr_pagemem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_ea_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Fail" - }, - { - "name": "ras_gfx.98.1", - "block": "gfx", - "subblock": "gfx_ea_iord_cmdmem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "addrType": "sys", - "status": "Fail" - }, - { - "name": "ras_gfx.99.1", - "block": "gfx", - "subblock": "gfx_ea_iowr_cmdmem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "addrType": "sys", - "status": "Fail" - }, - { - "name": "ras_gfx.100.1", - "block": "gfx", - "subblock": "gfx_ea_iowr_datamem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "addrType": "sys", - "status": "Fail" - }, - { - "name": "ras_gfx.101.1", - "block": "gfx", - "subblock": "gfx_ea_gmird_pagemem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "2", - "addrType": "sys", - "status": "Faild" - }, - { - "name": "ras_gfx.102.1", - "block": "gfx", - "subblock": "gfx_ea_gmiwr_pagemem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "8", - "addrType": "sys", - "status": "Faild" - }, - { - "name": "ras_gfx.103.1", - "block": "gfx", - "subblock": "gfx_ea_mam_d0mem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_ea_dirty.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "addrType": "fb", - "internal": "0", - "status": "Failed" - }, - { - "name": "ras_gfx.104.1", - "block": "gfx", - "subblock": "gfx_ea_mam_d1mem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_ea_dirty.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "addrType": "fb", - "internal": "0", - "status": "Failed" - }, - { - "name": "ras_gfx.105.1", - "block": "gfx", - "subblock": "gfx_ea_mam_d2mem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_ea_dirty.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "addrType": "fb", - "internal": "0", - "status": "Failed" - }, - { - "name": "ras_gfx.106.1", - "block": "gfx", - "subblock": "gfx_ea_mam_d3mem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_ea_dirty.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "addrType": "fb", - "internal": "0", - "status": "Failed" - }, - { - "name": "ras_gfx.107.2", - "block": "gfx", - "subblock": "gfx_ea_mam_a0mem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_ea_dirty.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "addrType": "fb", - "status": "Fail" - }, - { - "name": "ras_gfx.108.2", - "block": "gfx", - "subblock": "gfx_ea_mam_a1mem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_ea_dirty.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "addrType": "fb", - "status": "Fail" - }, - { - "name": "ras_gfx.109.2", - "block": "gfx", - "subblock": "gfx_ea_mam_a2mem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_ea_dirty.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "addrType": "fb", - "status": "Fail" - }, - { - "name": "ras_gfx.110.2", - "block": "gfx", - "subblock": "gfx_ea_mam_a3mem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_ea_dirty.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "addrType": "fb", - "status": "Fail" - }, - { - "name": "ras_gfx.111.2", - "block": "gfx", - "subblock": "gfx_ea_mam_afmem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_ea_dirty.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "addrType": "fb", - "status": "Fail" - }, - { - "name": "ras_gfx.112.2", - "block": "gfx", - "subblock": "utc_vml2_bank_cache", - "type": "single_correctable", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.112.4", - "block": "gfx", - "subblock": "utc_vml2_bank_cache", - "type": "multi_uncorrectable", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.112.8", - "block": "gfx", - "subblock": "utc_vml2_bank_cache", - "type": "poison", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.113.2", - "block": "gfx", - "subblock": "utc_vml2_walker", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_utc_vmwalker_pde_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "1", - "loop": "4", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.113.4", - "block": "gfx", - "subblock": "utc_vml2_walker", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_utc_vmwalker_pde_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "1", - "loop": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.113.8", - "block": "gfx", - "subblock": "utc_vml2_walker", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_utc_vmwalker_pde_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "1", - "loop": "4", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.114.2", - "block": "gfx", - "subblock": "utcl2_router", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_utc_vmwalker_pde_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "1", - "loop": "4", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.115.2", - "block": "gfx", - "subblock": "utc_atcl2_cache_2m_bank", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "64", - "internal": "0", - "loop": "4", - "addrType": "atcMem", - "status": "Failed" - }, - { - "name": "ras_gfx.116.2", - "block": "gfx", - "subblock": "utc_atcl2_cache_4k_bank", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "64", - "internal": "0", - "loop": "4", - "addrType": "atcMem", - "status": "Failed" - }, - { - "name": "ras_gfx.117.2", - "block": "gfx", - "subblock": "rlcg_instr_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.117.4", - "block": "gfx", - "subblock": "rlcg_instr_ram", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.117.8", - "block": "gfx", - "subblock": "rlcg_instr_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: GPU hang with uncorrectable error & ERREVENT_ATHUB_INTERRUPT" - }, - { - "name": "ras_gfx.118.2", - "block": "gfx", - "subblock": "rlcg_scratch_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Success: with correctable error" - }, - { - "name": "ras_gfx.119.2", - "block": "gfx", - "subblock": "rlcv_instr_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_vgpr_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Fail" - }, - { - "name": "ras_gfx.120.2", - "block": "gfx", - "subblock": "rlcv_scratch_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Fail" - }, - { - "name": "ras_gfx.121.2", - "block": "gfx", - "subblock": "rlc_tctag_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Fail" - }, - { - "name": "ras_gfx.122.2", - "block": "gfx", - "subblock": "rlc_spm_scratch_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Fail" - }, - { - "name": "ras_gfx.123.2", - "block": "gfx", - "subblock": "rlc_srm_data_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Fail" - }, - { - "name": "ras_gfx.124.2", - "block": "gfx", - "subblock": "rlc_srm_addr_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Fail" - }, - { - "name": "ras_gfx.125.2", - "block": "gfx", - "subblock": "rlc_spm_se0_scratch_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Fail" - }, - { - "name": "ras_gfx.126.2", - "block": "gfx", - "subblock": "rlc_spm_se1_scratch_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Fail" - }, - { - "name": "ras_gfx.127.2", - "block": "gfx", - "subblock": "rlc_spm_se2_scratch_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Fail" - }, - { - "name": "ras_gfx.128.2", - "block": "gfx", - "subblock": "rlc_spm_se3_scratch_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Fail" - }, - { - "name": "ras_gfx.129.2", - "block": "gfx", - "subblock": "rlc_spm_se4_scratch_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Fail" - }, - { - "name": "ras_gfx.130.2", - "block": "gfx", - "subblock": "rlc_spm_se5_scratch_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Fail" - }, - { - "name": "ras_gfx.131.2", - "block": "gfx", - "subblock": "rlc_spm_se6_scratch_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Fail" - }, - { - "name": "ras_gfx.132.2", - "block": "gfx", - "subblock": "rlc_spm_se7_scratch_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Fail" - }, - { - "name": "ras_mmhub.0.4", - "block": "mmhub", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "status": "GPU Hang" - }, - { - "name": "ras_pcie_bif.0.2", - "block": "pcie_bif", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "status": "Error count change" - }, - { - "name": "ras_pcie_bif.0.4", - "block": "pcie_bif", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "status": "GPU Hang" - }, - { - "name": "xgmi_wafl.0.2", - "block": "xgmi_wafl", - "subblock": "xgmi", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "status": "Error count change" - }, - { - "name": "xgmi_wafl.0.4", - "block": "xgmi_wafl", - "subblock": "xgmi", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "status": "GPU Hang" - }, - { - "name": "xgmi_wafl.1.2", - "block": "xgmi_wafl", - "subblock": "wafl", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "status": "Error count change" - }, - { - "name": "xgmi_wafl.1.4", - "block": "xgmi_wafl", - "subblock": "wafl", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin", - "status": "GPU Hang" - } - ] -} \ No newline at end of file diff --git a/ras_prebuild/config/config.json b/ras_prebuild/config/config.json deleted file mode 100644 index 5c3588bdda..0000000000 --- a/ras_prebuild/config/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "version": "0.0.1", - "devices": [ - { - "name": "VEGA20", - "ids": [ "0x66A0", "0x66A1", "0x66A2", "0x66A3", "0x66A4", "0x66A7", "0x66AF" ], - "config": "vega20.json", - "gfx": "libgfx9.so", - "sdma": "libsdma4.so" - }, - { - "name": "ARCTURUS", - "ids": [ "0x738C", "0x7388", "0x738E" ], - "config": "arcturus.json", - "gfx": "libgfx9.so", - "sdma": "libsdma4.so" - }, - { - "name": "SIENNA_CICHLID", - "ids": [ "0x73A0", "0x73A2", "0x73A3", "0x73AB", "0x73AE", "0x73BF" ], - "config": "sienna_cichlid.json", - "gfx": "libgfx10.so", - "sdma": "libsdma5.so" - } - ] -} \ No newline at end of file diff --git a/ras_prebuild/config/sienna_cichlid.json b/ras_prebuild/config/sienna_cichlid.json deleted file mode 100644 index c250304c8e..0000000000 --- a/ras_prebuild/config/sienna_cichlid.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "version": "0.0.1", - "type": { - "parity": 1, - "single_correctable": 2, - "multi_uncorrectable": 4, - "poison": 8 - }, - "block": { - "umc": { - "index": 0, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - } - }, - "tests": [ - { - "name": "ras_umc.0.2", - "block": "umc", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx10/edc/bin/sienna_cichlid/gc_edc_sqc_inst_bank_snop.bin" - }, - { - "name": "ras_umc.0.4", - "block": "umc", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx10/edc/bin/sienna_cichlid/gc_edc_sqc_inst_bank_snop.bin" - } - ] -} \ No newline at end of file diff --git a/ras_prebuild/config/vega20.json b/ras_prebuild/config/vega20.json deleted file mode 100644 index 3576418dce..0000000000 --- a/ras_prebuild/config/vega20.json +++ /dev/null @@ -1,4393 +0,0 @@ -{ - "version": "0.0.1", - "type": { - "parity": 1, - "single_correctable": 2, - "multi_uncorrectable": 4, - "poison": 8 - }, - "block": { - "umc": { - "index": 0, - "support": 1, - "type": [ - "parity", - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "sdma": { - "index": 1, - "support": 1, - "type": [ - "parity", - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx": { - "index": 2, - "support": 1, - "subblock": { - "gfx_cpc_scratch": { - "index": 0, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_cpc_ucode": { - "index": 1, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_dc_state_me1": { - "index": 2, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_dc_csinvoc_me1": { - "index": 3, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_dc_restore_me1": { - "index": 4, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_dc_state_me2": { - "index": 5, - "support": 0, - "type": [ - "parity", - "poison" - ] - }, - "gfx_dc_csinvoc_me2": { - "index": 6, - "support": 0, - "type": [ - "parity", - "poison" - ] - }, - "gfx_dc_restore_me2": { - "index": 7, - "support": 0, - "type": [ - "parity", - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_cpf_roq_me2": { - "index": 8, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_cpf_roq_me1": { - "index": 9, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_cpf_tag": { - "index": 10, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_cpg_dma_roq": { - "index": 11, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_cpg_dma_tag": { - "index": 12, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_cpg_tag": { - "index": 13, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_gds_mem": { - "index": 14, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_gds_input_queue": { - "index": 15, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_gds_oa_phy_cmd_ram_mem": { - "index": 16, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_gds_oa_phy_data_ram_mem": { - "index": 17, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_gds_oa_pipe_mem": { - "index": 18, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_spi_sr_mem": { - "index": 19, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_sq_sgpr": { - "index": 20, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sq_lds_d": { - "index": 21, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sq_lds_i": { - "index": 22, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sq_vgpr": { - "index": 23, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_inst_utcl1_lfifo": { - "index": 24, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_data_cu0_write_data_buf": { - "index": 25, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_data_cu0_utcl1_lfifo": { - "index": 26, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_data_cu1_write_data_buf": { - "index": 27, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_data_cu1_utcl1_lfifo": { - "index": 28, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_data_cu2_write_data_buf": { - "index": 29, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_data_cu2_utcl1_lfifo": { - "index": 30, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_inst_banka_tag_ram": { - "index": 31, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_inst_banka_utcl1_miss_fifo": { - "index": 32, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_sqc_inst_banka_miss_fifo": { - "index": 33, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_sqc_inst_banka_bank_ram": { - "index": 34, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_data_banka_tag_ram": { - "index": 35, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_data_banka_hit_fifo": { - "index": 36, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_sqc_data_banka_miss_fifo": { - "index": 37, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_sqc_data_banka_dirty_bit_ram": { - "index": 38, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_sqc_data_banka_bank_ram": { - "index": 39, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_sqc_inst_bankb_tag_ram": { - "index": 40, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_inst_bankb_utcl1_miss_fifo": { - "index": 41, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_sqc_inst_bankb_miss_fifo": { - "index": 42, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_sqc_inst_bankb_bank_ram": { - "index": 43, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_data_bankb_tag_ram": { - "index": 44, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_sqc_data_bankb_hit_fifo": { - "index": 45, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_sqc_data_bankb_miss_fifo": { - "index": 46, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_sqc_data_bankb_dirty_bit_ram": { - "index": 47, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_sqc_data_bankb_bank_ram": { - "index": 48, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ta_fs_dfifo": { - "index": 49, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ta_fs_afifo": { - "index": 50, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_ta_fl_lfifo": { - "index": 51, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_ta_fx_lfifo": { - "index": 52, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_ta_fs_cfifo": { - "index": 53, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_tca_hole_fifo": { - "index": 54, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_tca_req_fifo": { - "index": 55, - "support": 0, - "type": [ - "parity", - "poison" - ] - }, - "gfx_tcc_cache_data": { - "index": 56, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_cache_data_bank_0_1": { - "index": 57, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_cache_data_bank_1_0": { - "index": 58, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_cache_data_bank_1_1": { - "index": 59, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_cache_dirty_bank_0": { - "index": 60, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_cache_dirty_bank_1": { - "index": 61, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_high_rate_tag": { - "index": 62, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_low_rate_tag": { - "index": 63, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_in_use_dec": { - "index": 64, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_tcc_in_use_transfer": { - "index": 65, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_tcc_return_data": { - "support": 1, - "index": 66, - "type": [ - "parity", - "poison" - ] - }, - "gfx_tcc_return_control": { - "index": 67, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_tcc_uc_atomic_fifo": { - "index": 68, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_tcc_write_return": { - "index": 69, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_tcc_write_cache_read": { - "index": 70, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_tcc_src_fifo": { - "index": 71, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcc_src_fifo_next_ram": { - "index": 72, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_tcc_cache_tag_probe_fifo": { - "index": 73, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_tcc_latency_fifo": { - "index": 74, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_tcc_latency_fifo_next_ram": { - "index": 75, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_tcc_wrret_tag_write_return": { - "index": 76, - "support": 0, - "type": [ - "parity", - "poison" - ] - }, - "gfx_tcc_atomic_return_buffer": { - "index": 77, - "support": 0, - "type": [ - "parity", - "poison" - ] - }, - "gfx_tci_write_ram": { - "index": 78, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_tcp_cache_ram": { - "index": 79, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcp_lfifo_ram": { - "index": 80, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcp_cmd_fifo": { - "index": 81, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_tcp_vm_fifo": { - "index": 82, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcp_db_ram": { - "index": 83, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_tcp_utcl1_lfifo0": { - "index": 84, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_tcp_utcl1_lfifo1": { - "index": 85, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_td_ss_fifo_lo": { - "index": 86, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_td_ss_fifo_hi": { - "index": 87, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_td_cs_fifo": { - "index": 88, - "support": 1, - "type": [ - "parity", - "poison" - ] - }, - "gfx_ea_dramrd_cmdmem": { - "index": 89, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_dramwr_cmdmem": { - "index": 90, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_dramwr_datamem": { - "index": 91, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_rret_tagmem": { - "index": 92, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_wret_tagmem": { - "index": 93, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_gmird_cmdmem": { - "index": 94, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_gmiwr_cmdmem": { - "index": 95, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_gmiwr_datamem": { - "index": 96, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_dramrd_pagemem": { - "index": 97, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "gfx_ea_dramwr_pagemem": { - "index": 98, - "support": 0, - "type": [ - "parity", - "poison" - ] - }, - "gfx_ea_iord_cmdmem": { - "index": 99, - "support": 0, - "type": [ - "parity", - "poison" - ] - }, - "gfx_ea_iowr_cmdmem": { - "index": 100, - "support": 0, - "type": [ - "parity", - "poison" - ] - }, - "gfx_ea_iowr_datamem": { - "index": 101, - "support": 0, - "type": [ - "parity", - "poison" - ] - }, - "gfx_ea_gmird_pagemem": { - "index": 102, - "support": 0, - "type": [ - "parity", - "poison" - ] - }, - "gfx_ea_gmiwr_pagemem": { - "index": 103, - "support": 0, - "type": [ - "parity", - "poison" - ] - }, - "gfx_ea_mam_d0mem": { - "index": 104, - "support": 0, - "type": [ - "parity", - "poison" - ] - }, - "gfx_ea_mam_d1mem": { - "index": 105, - "support": 0, - "type": [ - "parity", - "poison" - ] - }, - "gfx_ea_mam_d2mem": { - "index": 106, - "support": 0, - "type": [ - "parity", - "poison" - ] - }, - "gfx_ea_mam_d3mem": { - "index": 107, - "support": 0, - "type": [ - "parity", - "poison" - ] - }, - "utc_vml2_bank_cache": { - "index": 108, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "utc_vml2_walker": { - "index": 109, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "utc_atcl2_cache_2m_bank": { - "index": 110, - "support": 0, - "type": [ - "parity", - "poison" - ] - }, - "utc_atcl2_cache_4k_bank": { - "index": 111, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - } - } - }, - "mmhub": { - "index": 3, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "athub": { - "index": 4, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "pcie_bif": { - "index": 5, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "hdp": { - "index": 6, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "xgmi_wafl": { - "index": 7, - "support": 1, - "method" : { - "crc": 2, - "data_parity": 6, - "replay_overflow": 7 - }, - "subblock": { - "xgmi": { - "index": 0, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable" - ] - }, - "wafl": { - "index": 1, - "support": 1, - "type": [ - "single_correctable", - "multi_uncorrectable" - ] - } - } - }, - "df": { - "index": 8, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "smn": { - "index": 9, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "sem": { - "index": 10, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mp0": { - "index": 11, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "mp1": { - "index": 12, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - }, - "fuse": { - "index": 13, - "support": 0, - "type": [ - "single_correctable", - "multi_uncorrectable", - "poison" - ] - } - }, - "tests": [ - { - "name": "ras_umc.0.2", - "block": "umc", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "status": "Error count change" - }, - { - "name": "ras_umc.0.4", - "block": "umc", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "status": "GPU Hang" - }, - { - "name": "ras_gfx.0.2", - "block": "gfx", - "subblock": "gfx_cpc_scratch", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "status": "Error count change" - }, - { - "name": "ras_gfx.0.4", - "block": "gfx", - "subblock": "gfx_cpc_scratch", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.0.8", - "block": "gfx", - "subblock": "gfx_cpc_scratch", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "status": "No GPU hang and no error count change" - }, - { - "name": "ras_gfx.1.2", - "block": "gfx", - "subblock": "gfx_cpc_ucode", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "status": "Error count change" - }, - { - "name": "ras_gfx.1.4", - "block": "gfx", - "subblock": "gfx_cpc_ucode", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.1.8", - "block": "gfx", - "subblock": "gfx_cpc_ucode", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.2.1", - "block": "gfx", - "subblock": "gfx_dc_state_me1", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.2.8", - "block": "gfx", - "subblock": "gfx_dc_state_me1", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "internal": "1", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.3.1", - "block": "gfx", - "subblock": "gfx_dc_csinvoc_me1", - "type": "parity", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "gridX": "4", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.3.8", - "block": "gfx", - "subblock": "gfx_dc_csinvoc_me1", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "gridX": "4", - "internal": "1", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.4.1", - "block": "gfx", - "subblock": "gfx_dc_restore_me1", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_dc_restore.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.4.8", - "block": "gfx", - "subblock": "gfx_dc_restore_me1", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_dc_restore.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.5.1", - "block": "gfx", - "subblock": "gfx_dc_state_me2", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_dc_restore.bin", - "backend": "kfd", - "queue": "compute", - "skip": "1", - "status": "No Support" - }, - { - "name": "ras_gfx.5.8", - "block": "gfx", - "subblock": "gfx_dc_state_me2", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_dc_restore.bin", - "backend": "kfd", - "queue": "compute", - "skip": "1", - "status": "No Support" - }, - { - "name": "ras_gfx.6.1", - "block": "gfx", - "subblock": "gfx_dc_csinvoc_me2", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_dc_restore.bin", - "backend": "kfd", - "queue": "compute", - "skip": "1", - "status": "No Support" - }, - { - "name": "ras_gfx.6.8", - "block": "gfx", - "subblock": "gfx_dc_csinvoc_me2", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_dc_restore.bin", - "backend": "kfd", - "queue": "compute", - "skip": "1", - "status": "No Support" - }, - { - "name": "ras_gfx.7.1", - "block": "gfx", - "subblock": "gfx_dc_restore_me2", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_dc_restore.bin", - "backend": "kfd", - "queue": "compute", - "skip": "1", - "status": "No Support" - }, - { - "name": "ras_gfx.7.8", - "block": "gfx", - "subblock": "gfx_dc_restore_me2", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_dc_restore.bin", - "backend": "kfd", - "queue": "compute", - "skip": "1", - "status": "No Support" - }, - { - "name": "ras_gfx.8.1", - "block": "gfx", - "subblock": "gfx_cpf_roq_me2", - "type": "parity", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.8.8", - "block": "gfx", - "subblock": "gfx_cpf_roq_me2", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "internal": "1", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.9.1", - "block": "gfx", - "subblock": "gfx_cpf_roq_me1", - "type": "parity", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.9.8", - "block": "gfx", - "subblock": "gfx_cpf_roq_me1", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "internal": "1", - "status": "Error count change" - }, - { - "name": "ras_gfx.10.2", - "block": "gfx", - "subblock": "gfx_cpf_tag", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "status": "Error count change" - }, - { - "name": "ras_gfx.10.4", - "block": "gfx", - "subblock": "gfx_cpf_tag", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.11.1", - "block": "gfx", - "subblock": "gfx_cpg_dma_roq", - "type": "parity", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.11.8", - "block": "gfx", - "subblock": "gfx_cpg_dma_roq", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "internal": "1", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.12.2", - "block": "gfx", - "subblock": "gfx_cpg_dma_tag", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "status": "Error count change" - }, - { - "name": "ras_gfx.12.4", - "block": "gfx", - "subblock": "gfx_cpg_dma_tag", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.12.8", - "block": "gfx", - "subblock": "gfx_cpg_dma_tag", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "internal": "1", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.13.2", - "block": "gfx", - "subblock": "gfx_cpg_tag", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/bin/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/bin/gc_edc_tc_02.bin", - "status": "Error count change" - }, - { - "name": "ras_gfx.13.4", - "block": "gfx", - "subblock": "gfx_cpg_tag", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.13.8", - "block": "gfx", - "subblock": "gfx_cpg_tag", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "internal": "1", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.14.2", - "block": "gfx", - "subblock": "gfx_gds_mem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_gds_02.bin", - "addrType": "mtype_gds", - "backend": "drm", - "status": "Fail" - }, - { - "name": "ras_gfx.14.4", - "block": "gfx", - "subblock": "gfx_gds_mem", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_gds_02.bin", - "addrType": "mtype_gds", - "backend": "drm", - "status": "Fail" - }, - { - "name": "ras_gfx.14.8", - "block": "gfx", - "subblock": "gfx_gds_mem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_gds_02.bin", - "addrType": "mtype_gds", - "backend": "drm", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.15.1", - "block": "gfx", - "subblock": "gfx_gds_input_queue", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_gds_02.bin", - "backend": "drm", - "status": "GPU hang without error count change" - }, - { - "name": "ras_gfx.15.8", - "block": "gfx", - "subblock": "gfx_gds_input_queue", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_gds_02.bin", - "backend": "drm", - "status": "GPU hang without error count change" - }, - { - "name": "ras_gfx.16.2", - "block": "gfx", - "subblock": "gfx_gds_oa_phy_cmd_ram_mem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_gds_oa_phy.bin", - "gdsOaPhy": "1", - "workGroupSizeX": "64", - "workGroupSizeY": "6", - "backend": "drm", - "status": "Error count change" - }, - { - "name": "ras_gfx.16.4", - "block": "gfx", - "subblock": "gfx_gds_oa_phy_cmd_ram_mem", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_gds_oa_phy.bin", - "gdsOaPhy": "1", - "workGroupSizeX": "64", - "workGroupSizeY": "6", - "backend": "drm", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.16.8", - "block": "gfx", - "subblock": "gfx_gds_oa_phy_cmd_ram_mem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_gds_oa_phy.bin", - "gdsOaPhy": "1", - "workGroupSizeX": "64", - "workGroupSizeY": "6", - "backend": "drm", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.17.1", - "block": "gfx", - "subblock": "gfx_gds_oa_phy_data_ram_mem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_gds_oa_phy.bin", - "gdsOaPhy": "1", - "workGroupSizeX": "64", - "workGroupSizeY": "6", - "backend": "drm", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.17.8", - "block": "gfx", - "subblock": "gfx_gds_oa_phy_data_ram_mem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_gds_oa_phy.bin", - "gdsOaPhy": "1", - "workGroupSizeX": "64", - "workGroupSizeY": "6", - "backend": "drm", - "status": "Error count change" - }, - { - "name": "ras_gfx.18.2", - "block": "gfx", - "subblock": "gfx_gds_oa_pipe_mem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_gds_oa_phy.bin", - "gdsOaPhy": "1", - "workGroupSizeX": "64", - "workGroupSizeY": "6", - "backend": "drm", - "status": "Error count change" - }, - { - "name": "ras_gfx.18.4", - "block": "gfx", - "subblock": "gfx_gds_oa_pipe_mem", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_gds_oa_phy.bin", - "gdsOaPhy": "1", - "workGroupSizeX": "64", - "workGroupSizeY": "6", - "backend": "drm", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.18.8", - "block": "gfx", - "subblock": "gfx_gds_oa_pipe_mem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_gds_oa_phy.bin", - "gdsOaPhy": "1", - "workGroupSizeX": "64", - "workGroupSizeY": "6", - "backend": "drm", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.19.1", - "block": "gfx", - "subblock": "gfx_spi_sr_mem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_dc_restore.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "1", - "internal": "1", - "status": "GPU hange without error count change" - }, - { - "name": "ras_gfx.19.8", - "block": "gfx", - "subblock": "gfx_spi_sr_mem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_dc_restore.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "1", - "internal": "1", - "status": "GPU hange without error count change" - }, - { - "name": "ras_gfx.20.2", - "block": "gfx", - "subblock": "gfx_sq_sgpr", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sgpr_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "Error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.20.4", - "block": "gfx", - "subblock": "gfx_sq_sgpr", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sgpr_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "GPU hang with error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.20.8", - "block": "gfx", - "subblock": "gfx_sq_sgpr", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sgpr_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "GPU hang with error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.21.2", - "block": "gfx", - "subblock": "gfx_sq_lds_d", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_lds_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.21.4", - "block": "gfx", - "subblock": "gfx_sq_lds_d", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_lds_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.21.8", - "block": "gfx", - "subblock": "gfx_sq_lds_d", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_lds_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.22.2", - "block": "gfx", - "subblock": "gfx_sq_lds_i", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_lds_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.22.4", - "block": "gfx", - "subblock": "gfx_sq_lds_i", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_lds_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.22.8", - "block": "gfx", - "subblock": "gfx_sq_lds_i", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_lds_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.23.2", - "block": "gfx", - "subblock": "gfx_sq_vgpr", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_vgpr_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "Error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.23.4", - "block": "gfx", - "subblock": "gfx_sq_vgpr", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_vgpr_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "Error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.23.8", - "block": "gfx", - "subblock": "gfx_sq_vgpr", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_vgpr_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "Error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.24.2", - "block": "gfx", - "subblock": "gfx_sqc_inst_utcl1_lfifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.24.4", - "block": "gfx", - "subblock": "gfx_sqc_inst_utcl1_lfifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.24.8", - "block": "gfx", - "subblock": "gfx_sqc_inst_utcl1_lfifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.25.2", - "block": "gfx", - "subblock": "gfx_sqc_data_cu0_write_data_buf", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.25.4", - "block": "gfx", - "subblock": "gfx_sqc_data_cu0_write_data_buf", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.25.8", - "block": "gfx", - "subblock": "gfx_sqc_data_cu0_write_data_buf", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.26.2", - "block": "gfx", - "subblock": "gfx_sqc_data_cu0_utcl1_lfifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sh_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "0", - "status": "Error count change" - }, - { - "name": "ras_gfx.26.4", - "block": "gfx", - "subblock": "gfx_sqc_data_cu0_utcl1_lfifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sh_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "0", - "status": "Error count change" - }, - { - "name": "ras_gfx.26.8", - "block": "gfx", - "subblock": "gfx_sqc_data_cu0_utcl1_lfifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sh_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "0", - "status": "Error count change" - }, - { - "name": "ras_gfx.27.2", - "block": "gfx", - "subblock": "gfx_sqc_data_cu1_write_data_buf", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "1", - "status": "Error count change" - }, - { - "name": "ras_gfx.27.4", - "block": "gfx", - "subblock": "gfx_sqc_data_cu1_write_data_buf", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "1", - "status": "Error count change" - }, - { - "name": "ras_gfx.27.8", - "block": "gfx", - "subblock": "gfx_sqc_data_cu1_write_data_buf", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "1", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.28.2", - "block": "gfx", - "subblock": "gfx_sqc_data_cu1_utcl1_lfifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sh_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "1", - "status": "Error count change" - }, - { - "name": "ras_gfx.28.4", - "block": "gfx", - "subblock": "gfx_sqc_data_cu1_utcl1_lfifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sh_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "1", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.28.8", - "block": "gfx", - "subblock": "gfx_sqc_data_cu1_utcl1_lfifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sh_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "1", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.29.2", - "block": "gfx", - "subblock": "gfx_sqc_data_cu2_write_data_buf", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "2", - "status": "Error count change" - }, - { - "name": "ras_gfx.29.4", - "block": "gfx", - "subblock": "gfx_sqc_data_cu2_write_data_buf", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "2", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.29.8", - "block": "gfx", - "subblock": "gfx_sqc_data_cu2_write_data_buf", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "2", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.30.2", - "block": "gfx", - "subblock": "gfx_sqc_data_cu2_utcl1_lfifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sh_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "2", - "status": "Error count change" - }, - { - "name": "ras_gfx.30.4", - "block": "gfx", - "subblock": "gfx_sqc_data_cu2_utcl1_lfifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sh_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "2", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.30.8", - "block": "gfx", - "subblock": "gfx_sqc_data_cu2_utcl1_lfifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sh_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "cuIndex": "2", - "status": "Error count change" - }, - { - "name": "ras_gfx.31.2", - "block": "gfx", - "subblock": "gfx_sqc_inst_banka_tag_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "Error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.31.4", - "block": "gfx", - "subblock": "gfx_sqc_inst_banka_tag_ram", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "GPU hang with error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.31.8", - "block": "gfx", - "subblock": "gfx_sqc_inst_banka_tag_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "GPU hang with error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.32.1", - "block": "gfx", - "subblock": "gfx_sqc_inst_banka_utcl1_miss_fifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.32.8", - "block": "gfx", - "subblock": "gfx_sqc_inst_banka_utcl1_miss_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.33.1", - "block": "gfx", - "subblock": "gfx_sqc_inst_banka_miss_fifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.33.8", - "block": "gfx", - "subblock": "gfx_sqc_inst_banka_miss_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.34.2", - "block": "gfx", - "subblock": "gfx_sqc_inst_banka_bank_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.34.4", - "block": "gfx", - "subblock": "gfx_sqc_inst_banka_bank_ram", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.34.8", - "block": "gfx", - "subblock": "gfx_sqc_inst_banka_bank_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.35.2", - "block": "gfx", - "subblock": "gfx_sqc_data_banka_tag_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank_tag.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "Error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.35.4", - "block": "gfx", - "subblock": "gfx_sqc_data_banka_tag_ram", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank_tag.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "GPU hang with error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.35.8", - "block": "gfx", - "subblock": "gfx_sqc_data_banka_tag_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank_tag.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "GPU hang with error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.36.1", - "block": "gfx", - "subblock": "gfx_sqc_data_banka_hit_fifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank_tag.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.36.8", - "block": "gfx", - "subblock": "gfx_sqc_data_banka_hit_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank_tag.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.37.1", - "block": "gfx", - "subblock": "gfx_sqc_data_banka_miss_fifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.37.8", - "block": "gfx", - "subblock": "gfx_sqc_data_banka_miss_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.38.1", - "block": "gfx", - "subblock": "gfx_sqc_data_banka_dirty_bit_ram", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_dirty_bit.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "1", - "addrType": "mtype_uc_sys", - "skip": "1", - "status": "No GPU hang and error counter change, Diag also fail" - }, - { - "name": "ras_gfx.38.8", - "block": "gfx", - "subblock": "gfx_sqc_data_banka_dirty_bit_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_dirty_bit.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "1", - "addrType": "mtype_uc_sys", - "skip": "1", - "status": "No GPU hang and error counter change, Diag also fail" - }, - { - "name": "ras_gfx.39.2", - "block": "gfx", - "subblock": "gfx_sqc_data_banka_bank_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.39.4", - "block": "gfx", - "subblock": "gfx_sqc_data_banka_bank_ram", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.39.8", - "block": "gfx", - "subblock": "gfx_sqc_data_banka_bank_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.40.2", - "block": "gfx", - "subblock": "gfx_sqc_inst_bankb_tag_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "Error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.40.4", - "block": "gfx", - "subblock": "gfx_sqc_inst_bankb_tag_ram", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "Error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.40.8", - "block": "gfx", - "subblock": "gfx_sqc_inst_bankb_tag_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "Error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.41.1", - "block": "gfx", - "subblock": "gfx_sqc_inst_bankb_utcl1_miss_fifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.41.8", - "block": "gfx", - "subblock": "gfx_sqc_inst_bankb_utcl1_miss_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.42.1", - "block": "gfx", - "subblock": "gfx_sqc_inst_bankb_miss_fifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.42.8", - "block": "gfx", - "subblock": "gfx_sqc_inst_bankb_miss_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.43.2", - "block": "gfx", - "subblock": "gfx_sqc_inst_bankb_bank_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.43.4", - "block": "gfx", - "subblock": "gfx_sqc_inst_bankb_bank_ram", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.43.8", - "block": "gfx", - "subblock": "gfx_sqc_inst_bankb_bank_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.44.2", - "block": "gfx", - "subblock": "gfx_sqc_data_bankb_tag_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank_tag.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "Error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.44.4", - "block": "gfx", - "subblock": "gfx_sqc_data_bankb_tag_ram", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank_tag.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "GPU hang with error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.44.8", - "block": "gfx", - "subblock": "gfx_sqc_data_bankb_tag_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank_tag.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "GPU hang with error count change with self injection, but TA doesn't work" - }, - { - "name": "ras_gfx.45.1", - "block": "gfx", - "subblock": "gfx_sqc_data_bankb_hit_fifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank_tag.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.45.8", - "block": "gfx", - "subblock": "gfx_sqc_data_bankb_hit_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank_tag.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.46.1", - "block": "gfx", - "subblock": "gfx_sqc_data_bankb_miss_fifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.46.8", - "block": "gfx", - "subblock": "gfx_sqc_data_bankb_miss_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.47.1", - "block": "gfx", - "subblock": "gfx_sqc_data_bankb_dirty_bit_ram", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_dirty_bit.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "1", - "addrType": "mtype_uc_sys", - "skip": "1", - "status": "No GPU hang and error counter change, Diag also fail" - }, - { - "name": "ras_gfx.47.8", - "block": "gfx", - "subblock": "gfx_sqc_data_bankb_dirty_bit_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_dirty_bit.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "1", - "addrType": "mtype_uc_sys", - "skip": "1", - "status": "No GPU hang and error counter change, Diag also fail" - }, - { - "name": "ras_gfx.48.2", - "block": "gfx", - "subblock": "gfx_sqc_data_bankb_bank_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.48.4", - "block": "gfx", - "subblock": "gfx_sqc_data_bankb_bank_ram", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.48.8", - "block": "gfx", - "subblock": "gfx_sqc_data_bankb_bank_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.49.2", - "block": "gfx", - "subblock": "gfx_ta_fs_dfifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.49.4", - "block": "gfx", - "subblock": "gfx_ta_fs_dfifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.49.8", - "block": "gfx", - "subblock": "gfx_ta_fs_dfifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Error count change" - }, - { - "name": "ras_gfx.50.1", - "block": "gfx", - "subblock": "gfx_ta_fs_afifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.50.8", - "block": "gfx", - "subblock": "gfx_ta_fs_afifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.51.1", - "block": "gfx", - "subblock": "gfx_ta_fl_lfifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.51.8", - "block": "gfx", - "subblock": "gfx_ta_fl_lfifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.52.1", - "block": "gfx", - "subblock": "gfx_ta_fx_lfifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.52.8", - "block": "gfx", - "subblock": "gfx_ta_fx_lfifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.53.1", - "block": "gfx", - "subblock": "gfx_ta_fs_cfifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.53.8", - "block": "gfx", - "subblock": "gfx_ta_fs_cfifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.54.1", - "block": "gfx", - "subblock": "gfx_tca_hole_fifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.54.8", - "block": "gfx", - "subblock": "gfx_tca_hole_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.55.1", - "block": "gfx", - "subblock": "gfx_tca_req_fifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "20", - "workGroupSizeX": "256", - "gridX": "16", - "internal": "1", - "COMPUTE_STATIC_THREAD_MGMT_SE0_sh0_cu_en": "255", - "status": "Fail" - }, - { - "name": "ras_gfx.55.8", - "block": "gfx", - "subblock": "gfx_tca_req_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "20", - "workGroupSizeX": "256", - "gridX": "16", - "internal": "1", - "COMPUTE_STATIC_THREAD_MGMT_SE0_sh0_cu_en": "255", - "status": "Fail" - }, - { - "name": "ras_gfx.56.2", - "block": "gfx", - "subblock": "gfx_tcc_cache_data", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.56.4", - "block": "gfx", - "subblock": "gfx_tcc_cache_data", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.56.8", - "block": "gfx", - "subblock": "gfx_tcc_cache_data", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.57.2", - "block": "gfx", - "subblock": "gfx_tcc_cache_data_bank_0_1", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.57.4", - "block": "gfx", - "subblock": "gfx_tcc_cache_data_bank_0_1", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.57.8", - "block": "gfx", - "subblock": "gfx_tcc_cache_data_bank_0_1", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.58.2", - "block": "gfx", - "subblock": "gfx_tcc_cache_data_bank_1_0", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.58.4", - "block": "gfx", - "subblock": "gfx_tcc_cache_data_bank_1_0", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.58.8", - "block": "gfx", - "subblock": "gfx_tcc_cache_data_bank_1_0", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.59.2", - "block": "gfx", - "subblock": "gfx_tcc_cache_data_bank_1_1", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.59.4", - "block": "gfx", - "subblock": "gfx_tcc_cache_data_bank_1_1", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.59.8", - "block": "gfx", - "subblock": "gfx_tcc_cache_data_bank_1_1", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.60.2", - "block": "gfx", - "subblock": "gfx_tcc_cache_dirty_bank_0", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "Error count change with internal injection" - }, - { - "name": "ras_gfx.60.4", - "block": "gfx", - "subblock": "gfx_tcc_cache_dirty_bank_0", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "Error count change with internal injection" - }, - { - "name": "ras_gfx.60.8", - "block": "gfx", - "subblock": "gfx_tcc_cache_dirty_bank_0", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "Error count change with internal injection" - }, - { - "name": "ras_gfx.61.2", - "block": "gfx", - "subblock": "gfx_tcc_cache_dirty_bank_1", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "internal": "1", - "status": "Error count change with internal injection" - }, - { - "name": "ras_gfx.61.4", - "block": "gfx", - "subblock": "gfx_tcc_cache_dirty_bank_1", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "internal": "1", - "status": "Error count change with internal injection" - }, - { - "name": "ras_gfx.61.8", - "block": "gfx", - "subblock": "gfx_tcc_cache_dirty_bank_1", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "internal": "1", - "status": "Error count change with internal injection" - }, - { - "name": "ras_gfx.62.2", - "block": "gfx", - "subblock": "gfx_tcc_high_rate_tag", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.62.4", - "block": "gfx", - "subblock": "gfx_tcc_high_rate_tag", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.62.8", - "block": "gfx", - "subblock": "gfx_tcc_high_rate_tag", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.63.2", - "block": "gfx", - "subblock": "gfx_tcc_low_rate_tag", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.63.4", - "block": "gfx", - "subblock": "gfx_tcc_low_rate_tag", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.63.8", - "block": "gfx", - "subblock": "gfx_tcc_low_rate_tag", - "type": "poison", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.64.1", - "block": "gfx", - "subblock": "gfx_tcc_in_use_dec", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.64.8", - "block": "gfx", - "subblock": "gfx_tcc_in_use_dec", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.65.1", - "block": "gfx", - "subblock": "gfx_tcc_in_use_transfer", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.65.8", - "block": "gfx", - "subblock": "gfx_tcc_in_use_transfer", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.66.1", - "block": "gfx", - "subblock": "gfx_tcc_return_data", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.66.8", - "block": "gfx", - "subblock": "gfx_tcc_return_data", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.67.1", - "block": "gfx", - "subblock": "gfx_tcc_return_control", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_atomic_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "64", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.67.8", - "block": "gfx", - "subblock": "gfx_tcc_return_control", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_atomic_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "64", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.68.1", - "block": "gfx", - "subblock": "gfx_tcc_uc_atomic_fifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "GPU hang with error count change, with internal injection" - }, - { - "name": "ras_gfx.68.8", - "block": "gfx", - "subblock": "gfx_tcc_uc_atomic_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "GPU hang with error count change, with internal injection" - }, - { - "name": "ras_gfx.69.1", - "block": "gfx", - "subblock": "gfx_tcc_write_return", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "GPU hang with error count change, with internal injection" - }, - { - "name": "ras_gfx.69.8", - "block": "gfx", - "subblock": "gfx_tcc_write_return", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "GPU hang with error count change, with internal injection" - }, - { - "name": "ras_gfx.70.1", - "block": "gfx", - "subblock": "gfx_tcc_write_cache_read", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "internal": "1", - "status": "GPU hang with error count change, with internal injection" - }, - { - "name": "ras_gfx.70.8", - "block": "gfx", - "subblock": "gfx_tcc_write_cache_read", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "internal": "1", - "status": "GPU hang with error count change, with internal injection" - }, - { - "name": "ras_gfx.71.2", - "block": "gfx", - "subblock": "gfx_tcc_src_fifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "internal": "1", - "status": "Error count change with internal injection" - }, - { - "name": "ras_gfx.71.4", - "block": "gfx", - "subblock": "gfx_tcc_src_fifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "internal": "1", - "status": "Error count change with internal injection" - }, - { - "name": "ras_gfx.71.8", - "block": "gfx", - "subblock": "gfx_tcc_src_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "internal": "1", - "status": "Error count change with internal injection" - }, - { - "name": "ras_gfx.72.1", - "block": "gfx", - "subblock": "gfx_tcc_src_fifo_next_ram", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "5", - "internal": "1", - "status": "Error count change with internal injection, trafficed by glMark2" - }, - { - "name": "ras_gfx.72.8", - "block": "gfx", - "subblock": "gfx_tcc_src_fifo_next_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "5", - "internal": "1", - "status": "Error count change with internal injection, trafficed by glMark2" - }, - { - "name": "ras_gfx.73.1", - "block": "gfx", - "subblock": "gfx_tcc_cache_tag_probe_fifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "16", - "workGroupSizeX": "64", - "gridX": "4", - "internal": "0", - "addrType": "fb", - "loop": "16", - "status": "Fail with Diag, no error counter change" - }, - { - "name": "ras_gfx.73.8", - "block": "gfx", - "subblock": "gfx_tcc_cache_tag_probe_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "16", - "workGroupSizeX": "64", - "gridX": "4", - "internal": "0", - "addrType": "fb", - "loop": "16", - "status": "Fail with Diag, no error counter change" - }, - { - "name": "ras_gfx.74.1", - "block": "gfx", - "subblock": "gfx_tcc_latency_fifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "5", - "loop": "128", - "status": "Error count change" - }, - { - "name": "ras_gfx.74.8", - "block": "gfx", - "subblock": "gfx_tcc_latency_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "5", - "loop": "128", - "status": "Error count change" - }, - { - "name": "ras_gfx.75.1", - "block": "gfx", - "subblock": "gfx_tcc_latency_fifo_next_ram", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "1", - "workGroupSizeX": "64", - "gridX": "4", - "internal": "1", - "addrType": "fb", - "loop": "16", - "status": "GPU hang without error count change, TA inject failed, should check with FW team" - }, - { - "name": "ras_gfx.75.8", - "block": "gfx", - "subblock": "gfx_tcc_latency_fifo_next_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "1", - "workGroupSizeX": "64", - "gridX": "4", - "internal": "1", - "addrType": "fb", - "loop": "16", - "status": "GPU hang without error count change, TA inject failed, should check with FW team" - }, - { - "name": "ras_gfx.76.1", - "block": "gfx", - "subblock": "gfx_tcc_wrret_tag_write_return", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "16", - "workGroupSizeX": "256", - "gridX": "64", - "internal": "1", - "addrType": "fb", - "loop": "2096", - "skip": "1", - "status": "Fail" - }, - { - "name": "ras_gfx.76.8", - "block": "gfx", - "subblock": "gfx_tcc_wrret_tag_write_return", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "16", - "workGroupSizeX": "256", - "gridX": "64", - "internal": "1", - "addrType": "fb", - "loop": "2096", - "skip": "1", - "status": "Fail" - }, - { - "name": "ras_gfx.77.1", - "block": "gfx", - "subblock": "gfx_tcc_atomic_return_buffer", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "16", - "workGroupSizeX": "256", - "gridX": "64", - "internal": "1", - "addrType": "fb", - "loop": "2096", - "skip": "1", - "status": "Fail" - }, - { - "name": "ras_gfx.77.8", - "block": "gfx", - "subblock": "gfx_tcc_atomic_return_buffer", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "maxQueueNum": "16", - "workGroupSizeX": "256", - "gridX": "64", - "internal": "1", - "addrType": "fb", - "loop": "2096", - "skip": "1", - "status": "Fail" - }, - { - "name": "ras_gfx.78.1", - "block": "gfx", - "subblock": "gfx_tci_write_ram", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "GPU hang without error count change, TA inject failed, should check with FW team" - }, - { - "name": "ras_gfx.78.8", - "block": "gfx", - "subblock": "gfx_tci_write_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "GPU hang without error count change, TA inject failed, should check with FW team" - }, - { - "name": "ras_gfx.79.2", - "block": "gfx", - "subblock": "gfx_tcp_cache_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "Error count change with glmark2" - }, - { - "name": "ras_gfx.79.4", - "block": "gfx", - "subblock": "gfx_tcp_cache_ram", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "Error count change with glmark2" - }, - { - "name": "ras_gfx.79.8", - "block": "gfx", - "subblock": "gfx_tcp_cache_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "Error count change with glmark2" - }, - { - "name": "ras_gfx.80.2", - "block": "gfx", - "subblock": "gfx_tcp_lfifo_ram", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "Error count change with glmark2" - }, - { - "name": "ras_gfx.80.4", - "block": "gfx", - "subblock": "gfx_tcp_lfifo_ram", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "Error count change with glmark2" - }, - { - "name": "ras_gfx.80.8", - "block": "gfx", - "subblock": "gfx_tcp_lfifo_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "Error count change with glmark2" - }, - { - "name": "ras_gfx.81.1", - "block": "gfx", - "subblock": "gfx_tcp_cmd_fifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "Error count change" - }, - { - "name": "ras_gfx.81.8", - "block": "gfx", - "subblock": "gfx_tcp_cmd_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "Error count change" - }, - { - "name": "ras_gfx.82.2", - "block": "gfx", - "subblock": "gfx_tcp_vm_fifo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "Error count change" - }, - { - "name": "ras_gfx.82.4", - "block": "gfx", - "subblock": "gfx_tcp_vm_fifo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "Error count change" - }, - { - "name": "ras_gfx.82.8", - "block": "gfx", - "subblock": "gfx_tcp_vm_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "Error count change" - }, - { - "name": "ras_gfx.83.1", - "block": "gfx", - "subblock": "gfx_tcp_db_ram", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "GPU hang with error count change, with glmark2" - }, - { - "name": "ras_gfx.83.8", - "block": "gfx", - "subblock": "gfx_tcp_db_ram", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "internal": "1", - "status": "GPU hang with error count change, with glmark2" - }, - { - "name": "ras_gfx.84.2", - "block": "gfx", - "subblock": "gfx_tcp_utcl1_lfifo0", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tcp_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "Error count change, but next injection will fail" - }, - { - "name": "ras_gfx.84.4", - "block": "gfx", - "subblock": "gfx_tcp_utcl1_lfifo0", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tcp_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "Error count change, but next injection will fail" - }, - { - "name": "ras_gfx.84.8", - "block": "gfx", - "subblock": "gfx_tcp_utcl1_lfifo0", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tcp_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "Error count change, but next injection will fail" - }, - { - "name": "ras_gfx.85.2", - "block": "gfx", - "subblock": "gfx_tcp_utcl1_lfifo1", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tcp_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "Error count change, but next injection will fail" - }, - { - "name": "ras_gfx.85.4", - "block": "gfx", - "subblock": "gfx_tcp_utcl1_lfifo1", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tcp_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "Error count change, but next injection will fail" - }, - { - "name": "ras_gfx.85.8", - "block": "gfx", - "subblock": "gfx_tcp_utcl1_lfifo1", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tcp_atcl1.bin", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "Error count change, but next injection will fail" - }, - { - "name": "ras_gfx.86.2", - "block": "gfx", - "subblock": "gfx_td_ss_fifo_lo", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "loop": "10", - "internal": "0", - "status": "Error count change" - }, - { - "name": "ras_gfx.86.4", - "block": "gfx", - "subblock": "gfx_td_ss_fifo_lo", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "loop": "10", - "internal": "0", - "status": "Error count change" - }, - { - "name": "ras_gfx.86.8", - "block": "gfx", - "subblock": "gfx_td_ss_fifo_lo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "loop": "10", - "internal": "0", - "status": "Error count change" - }, - { - "name": "ras_gfx.87.2", - "block": "gfx", - "subblock": "gfx_td_ss_fifo_hi", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "loop": "10", - "internal": "0", - "status": "Error count change" - }, - { - "name": "ras_gfx.87.4", - "block": "gfx", - "subblock": "gfx_td_ss_fifo_hi", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "loop": "10", - "internal": "0", - "status": "Error count change" - }, - { - "name": "ras_gfx.87.8", - "block": "gfx", - "subblock": "gfx_td_ss_fifo_hi", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "loop": "10", - "internal": "0", - "status": "Error count change" - }, - { - "name": "ras_gfx.88.1", - "block": "gfx", - "subblock": "gfx_td_cs_fifo", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "loop": "10", - "internal": "0", - "status": "Error count change" - }, - { - "name": "ras_gfx.88.8", - "block": "gfx", - "subblock": "gfx_td_cs_fifo", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "loop": "10", - "internal": "0", - "status": "Error count change" - }, - { - "name": "ras_gfx.89.2", - "block": "gfx", - "subblock": "gfx_ea_dramrd_cmdmem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_ea.bin", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "Error count change, but next injection will fail" - }, - { - "name": "ras_gfx.89.4", - "block": "gfx", - "subblock": "gfx_ea_dramrd_cmdmem", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_ea.bin", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "Error count change, but next injection will fail" - }, - { - "name": "ras_gfx.89.8", - "block": "gfx", - "subblock": "gfx_ea_dramrd_cmdmem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_ea.bin", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "Error count change, but next injection will fail" - }, - { - "name": "ras_gfx.90.2", - "block": "gfx", - "subblock": "gfx_ea_dramwr_cmdmem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_ea_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "internal": "1", - "status": "Error count change" - }, - { - "name": "ras_gfx.90.4", - "block": "gfx", - "subblock": "gfx_ea_dramwr_cmdmem", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_ea_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "internal": "1", - "status": "Error count change" - }, - { - "name": "ras_gfx.90.8", - "block": "gfx", - "subblock": "gfx_ea_dramwr_cmdmem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_ea_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "internal": "1", - "status": "Error count change" - }, - { - "name": "ras_gfx.91.2", - "block": "gfx", - "subblock": "gfx_ea_dramwr_datamem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "internal": "1", - "status": "Error count change" - }, - { - "name": "ras_gfx.91.4", - "block": "gfx", - "subblock": "gfx_ea_dramwr_datamem", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "internal": "1", - "status": "Error count change" - }, - { - "name": "ras_gfx.91.8", - "block": "gfx", - "subblock": "gfx_ea_dramwr_datamem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "internal": "1", - "status": "Error count change" - }, - { - "name": "ras_gfx.92.2", - "block": "gfx", - "subblock": "gfx_ea_rret_tagmem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "internal": "0", - "status": "Error count change, but next injection will fail" - }, - { - "name": "ras_gfx.92.4", - "block": "gfx", - "subblock": "gfx_ea_rret_tagmem", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "internal": "0", - "status": "Error count change, but next injection will fail" - }, - { - "name": "ras_gfx.92.8", - "block": "gfx", - "subblock": "gfx_ea_rret_tagmem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "internal": "0", - "status": "Error count change, but next injection will fail" - }, - { - "name": "ras_gfx.93.2", - "block": "gfx", - "subblock": "gfx_ea_wret_tagmem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "internal": "1", - "status": "Error count change" - }, - { - "name": "ras_gfx.93.4", - "block": "gfx", - "subblock": "gfx_ea_wret_tagmem", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "internal": "1", - "status": "Error count change" - }, - { - "name": "ras_gfx.93.8", - "block": "gfx", - "subblock": "gfx_ea_wret_tagmem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "internal": "1", - "status": "Error count change" - }, - { - "name": "ras_gfx.94.2", - "block": "gfx", - "subblock": "gfx_ea_gmird_cmdmem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "64", - "addrType": "sys", - "status": "Faild" - }, - { - "name": "ras_gfx.94.4", - "block": "gfx", - "subblock": "gfx_ea_gmird_cmdmem", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "64", - "addrType": "sys", - "status": "Faild" - }, - { - "name": "ras_gfx.94.8", - "block": "gfx", - "subblock": "gfx_ea_gmird_cmdmem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "64", - "addrType": "sys", - "status": "Faild" - }, - { - "name": "ras_gfx.95.2", - "block": "gfx", - "subblock": "gfx_ea_gmiwr_cmdmem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02_ea.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "64", - "addrType": "sys", - "status": "Faild" - }, - { - "name": "ras_gfx.95.4", - "block": "gfx", - "subblock": "gfx_ea_gmiwr_cmdmem", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02_ea.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "64", - "addrType": "sys", - "status": "Faild" - }, - { - "name": "ras_gfx.95.8", - "block": "gfx", - "subblock": "gfx_ea_gmiwr_cmdmem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02_ea.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "64", - "addrType": "sys", - "status": "Faild" - }, - { - "name": "ras_gfx.96.2", - "block": "gfx", - "subblock": "gfx_ea_gmiwr_datamem", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "64", - "addrType": "sys", - "status": "Faild" - }, - { - "name": "ras_gfx.96.4", - "block": "gfx", - "subblock": "gfx_ea_gmiwr_datamem", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "64", - "addrType": "sys", - "status": "Faild" - }, - { - "name": "ras_gfx.96.8", - "block": "gfx", - "subblock": "gfx_ea_gmiwr_datamem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "64", - "addrType": "sys", - "status": "Faild" - }, - { - "name": "ras_gfx.97.1", - "block": "gfx", - "subblock": "gfx_ea_dramrd_pagemem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_ea_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "5", - "addrType": "mtype_uc_vid", - "status": "Faild" - }, - { - "name": "ras_gfx.97.8", - "block": "gfx", - "subblock": "gfx_ea_dramrd_pagemem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_ea_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "5", - "addrType": "mtype_uc_vid", - "status": "Faild" - }, - { - "name": "ras_gfx.98.1", - "block": "gfx", - "subblock": "gfx_ea_dramwr_pagemem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_ea_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Faild" - }, - { - "name": "ras_gfx.98.8", - "block": "gfx", - "subblock": "gfx_ea_dramwr_pagemem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_ea_02.bin", - "backend": "kfd", - "queue": "compute", - "status": "Faild" - }, - { - "name": "ras_gfx.99.1", - "block": "gfx", - "subblock": "gfx_ea_iord_cmdmem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "addrType": "sys", - "status": "Faild" - }, - { - "name": "ras_gfx.99.8", - "block": "gfx", - "subblock": "gfx_ea_iord_cmdmem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "addrType": "sys", - "status": "Faild" - }, - { - "name": "ras_gfx.100.1", - "block": "gfx", - "subblock": "gfx_ea_iowr_cmdmem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "addrType": "sys", - "status": "Faild" - }, - { - "name": "ras_gfx.100.8", - "block": "gfx", - "subblock": "gfx_ea_iowr_cmdmem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "addrType": "sys", - "status": "Faild" - }, - { - "name": "ras_gfx.101.1", - "block": "gfx", - "subblock": "gfx_ea_iowr_datamem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "addrType": "sys", - "status": "Faild" - }, - { - "name": "ras_gfx.101.8", - "block": "gfx", - "subblock": "gfx_ea_iowr_datamem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "4", - "addrType": "sys", - "status": "Faild" - }, - { - "name": "ras_gfx.102.1", - "block": "gfx", - "subblock": "gfx_ea_gmird_pagemem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "2", - "addrType": "sys", - "status": "Faild" - }, - { - "name": "ras_gfx.102.8", - "block": "gfx", - "subblock": "gfx_ea_gmird_pagemem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "2", - "addrType": "sys", - "status": "Faild" - }, - { - "name": "ras_gfx.103.1", - "block": "gfx", - "subblock": "gfx_ea_gmiwr_pagemem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "8", - "addrType": "sys", - "status": "Faild" - }, - { - "name": "ras_gfx.103.8", - "block": "gfx", - "subblock": "gfx_ea_gmiwr_pagemem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "8", - "addrType": "sys", - "status": "Faild" - }, - { - "name": "ras_gfx.104.1", - "block": "gfx", - "subblock": "gfx_ea_mam_d0mem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_ea_dirty.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "addrType": "fb", - "internal": "0", - "status": "Failed" - }, - { - "name": "ras_gfx.104.8", - "block": "gfx", - "subblock": "gfx_ea_mam_d0mem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_ea_dirty.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "addrType": "fb", - "internal": "0", - "status": "Failed" - }, - { - "name": "ras_gfx.105.1", - "block": "gfx", - "subblock": "gfx_ea_mam_d1mem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_ea_dirty.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "addrType": "fb", - "internal": "0", - "status": "Failed" - }, - { - "name": "ras_gfx.105.8", - "block": "gfx", - "subblock": "gfx_ea_mam_d1mem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_ea_dirty.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "addrType": "fb", - "internal": "0", - "status": "Failed" - }, - { - "name": "ras_gfx.106.1", - "block": "gfx", - "subblock": "gfx_ea_mam_d2mem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_ea_dirty.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "addrType": "fb", - "internal": "0", - "status": "Failed" - }, - { - "name": "ras_gfx.106.8", - "block": "gfx", - "subblock": "gfx_ea_mam_d2mem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_ea_dirty.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "addrType": "fb", - "internal": "0", - "status": "Failed" - }, - { - "name": "ras_gfx.107.1", - "block": "gfx", - "subblock": "gfx_ea_mam_d3mem", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_ea_dirty.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "addrType": "fb", - "internal": "0", - "status": "Failed" - }, - { - "name": "ras_gfx.107.8", - "block": "gfx", - "subblock": "gfx_ea_mam_d3mem", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_ea_dirty.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "64", - "gridX": "4", - "addrType": "fb", - "internal": "0", - "status": "Failed" - }, - { - "name": "ras_gfx.108.2", - "block": "gfx", - "subblock": "utc_vml2_bank_cache", - "type": "single_correctable", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "Error count change" - }, - { - "name": "ras_gfx.108.4", - "block": "gfx", - "subblock": "utc_vml2_bank_cache", - "type": "multi_uncorrectable", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.108.8", - "block": "gfx", - "subblock": "utc_vml2_bank_cache", - "type": "poison", - "backend": "kfd", - "queue": "compute", - "internal": "0", - "status": "GPU hang with error count change" - }, - { - "name": "ras_gfx.109.2", - "block": "gfx", - "subblock": "utc_vml2_walker", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_utc_vmwalker_pde_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "1", - "internal": "0", - "loop": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.109.4", - "block": "gfx", - "subblock": "utc_vml2_walker", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_utc_vmwalker_pde_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "1", - "internal": "0", - "loop": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.109.8", - "block": "gfx", - "subblock": "utc_vml2_walker", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_utc_vmwalker_pde_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "512", - "gridX": "1", - "internal": "0", - "loop": "4", - "status": "Error count change" - }, - { - "name": "ras_gfx.110.1", - "block": "gfx", - "subblock": "utc_atcl2_cache_2m_bank", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "64", - "internal": "0", - "loop": "4", - "addrType": "atcMem", - "status": "Failed" - }, - { - "name": "ras_gfx.110.8", - "block": "gfx", - "subblock": "utc_atcl2_cache_2m_bank", - "type": "parity", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "64", - "internal": "0", - "loop": "4", - "addrType": "atcMem", - "status": "Failed" - }, - { - "name": "ras_gfx.111.2", - "block": "gfx", - "subblock": "utc_atcl2_cache_4k_bank", - "type": "single_correctable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "64", - "internal": "0", - "loop": "4", - "addrType": "atcMem", - "status": "Failed" - }, - { - "name": "ras_gfx.111.4", - "block": "gfx", - "subblock": "utc_atcl2_cache_4k_bank", - "type": "multi_uncorrectable", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "64", - "internal": "0", - "loop": "4", - "addrType": "atcMem", - "status": "Failed" - }, - { - "name": "ras_gfx.111.8", - "block": "gfx", - "subblock": "utc_atcl2_cache_4k_bank", - "type": "poison", - "dispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin", - "backend": "kfd", - "queue": "compute", - "workGroupSizeX": "256", - "gridX": "64", - "internal": "0", - "loop": "4", - "addrType": "atcMem", - "status": "Failed" - }, - { - "name": "ras_mmhub.0.2", - "block": "mmhub", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "status": "Error count change" - }, - { - "name": "ras_mmhub.0.4", - "block": "mmhub", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "status": "GPU Hang" - }, - { - "name": "ras_pcie_bif.0.2", - "block": "pcie_bif", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "status": "Error count change" - }, - { - "name": "ras_pcie_bif.0.4", - "block": "pcie_bif", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "status": "GPU Hang" - }, - { - "name": "xgmi_wafl.0.2", - "block": "xgmi_wafl", - "subblock": "xgmi", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "status": "Error count change" - }, - { - "name": "xgmi_wafl.0.4", - "block": "xgmi_wafl", - "subblock": "xgmi", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "status": "GPU Hang" - }, - { - "name": "xgmi_wafl.1.2", - "block": "xgmi_wafl", - "subblock": "wafl", - "type": "single_correctable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "status": "Error count change" - }, - { - "name": "xgmi_wafl.1.4", - "block": "xgmi_wafl", - "subblock": "wafl", - "type": "multi_uncorrectable", - "nullDispatchCS": "sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin", - "status": "GPU Hang" - } - ] -} \ No newline at end of file diff --git a/ras_prebuild/librdc_ras.so b/ras_prebuild/librdc_ras.so deleted file mode 100755 index 8778e4de89..0000000000 Binary files a/ras_prebuild/librdc_ras.so and /dev/null differ diff --git a/ras_prebuild/sp3/gfx10/edc/bin/sienna_cichlid/gc_edc_sqc_inst_bank_snop.bin b/ras_prebuild/sp3/gfx10/edc/bin/sienna_cichlid/gc_edc_sqc_inst_bank_snop.bin deleted file mode 100644 index 57ec7404bf..0000000000 Binary files a/ras_prebuild/sp3/gfx10/edc/bin/sienna_cichlid/gc_edc_sqc_inst_bank_snop.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx10/edc/sienna_cichlid/gc_edc_sqc_inst_bank_snop.sp3 b/ras_prebuild/sp3/gfx10/edc/sienna_cichlid/gc_edc_sqc_inst_bank_snop.sp3 deleted file mode 100644 index 08cad4a938..0000000000 --- a/ras_prebuild/sp3/gfx10/edc/sienna_cichlid/gc_edc_sqc_inst_bank_snop.sp3 +++ /dev/null @@ -1,31 +0,0 @@ -shader main -asic(GFX10) -wave_size(32) -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -for var i = 0; i < 1000; i++ - s_nop 0x1 -end - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_clear_vgpr_lds_arcturus.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_clear_vgpr_lds_arcturus.sp3 deleted file mode 100644 index ad50ccc374..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_clear_vgpr_lds_arcturus.sp3 +++ /dev/null @@ -1,42 +0,0 @@ -shader main -type(CS) -user_sgpr_count(0) - - // Clear ACC VGPR - for var vgpr = 0; vgpr < 256; ++vgpr - v_accvgpr_write acc[vgpr], 0 - end - - s_movk_i32 m0, 0x0000 - s_mov_b32 s10, 0x000000f8 - s_set_gpr_idx_on s10, 0x8 -label_0004: - v_mov_b32 v0, 0 - v_mov_b32 v1, 0 - v_mov_b32 v2, 0 - v_mov_b32 v3, 0 - v_mov_b32 v4, 0 - v_mov_b32 v5, 0 - v_mov_b32 v6, 0 - v_mov_b32 v7, 0 - s_sub_u32 s10, s10, 8 - s_set_gpr_idx_idx s10 - s_cbranch_scc0 label_0004 - s_set_gpr_idx_off - v_mbcnt_lo_u32_b32 v1, exec_hi, 0 - v_mbcnt_hi_u32_b32 v1, exec_lo, v1 - v_mul_u32_u24 v1, 8, v1 - s_getreg_b32 s11, hwreg(HW_REG_HW_ID, 4, 2) - s_mulk_i32 s11, 0x4000 - v_add_co_u32 v1, vcc, v1, s11 - s_mov_b32 s10, 7 - s_mov_b32 m0, -1 -label_001B: - ds_write2_b64 v1, v[2:3], v[2:3] offset1:64 - ds_write2_b64 v1, v[4:5], v[4:5] offset0:128 offset1:192 - v_add_co_u32 v1, vcc, 0x00000800, v1 - s_sub_u32 s10, s10, 1 - s_cbranch_scc0 label_001B - -s_endpgm -end diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_compute.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_compute.sp3 deleted file mode 100644 index 6b9aa3b887..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_compute.sp3 +++ /dev/null @@ -1,113 +0,0 @@ -shader main -type(CS) -user_sgpr_count(8) // 2 for the buffer resource + 5 for thread/thread group parameters - //s[0:1] the mmeory address for the buffer resource - //s2 x - //s3 x*y - //s4 x*y*z - //s5 X - //s6 X*Y - //s7 output offset - //s8 loop - - tgid_x_en(1) //s_tgid_x s9 - tgid_y_en(1) //s_tgid_y s10 - tgid_z_en(1) //s_tgid_z s11 - - //vo for tid_x - //v1 for tid_y - //v2 for tid_z - - // Clear ACC VGPR - for var vgpr = 0; vgpr < 256; ++vgpr - v_accvgpr_write acc[vgpr], 0 - end - - //sp3 loop for lifetime - s_mov_b32 s12, 0 //init loop idx s12 -label_0001: - s_cmp_lt_i32 s12, s8 //scc = (s12 < s8) ? 1 : 0 - s_cbranch_scc0 label_0006 //if(scc == 0) then jump to label_0006; else nop - v_mov_b32 v4,s12 - s_add_i32 s12, s12, 1 //add loop incr - s_branch label_0001 -label_0006: //end of SP3 loop - - //fetch the buffer resource through SQC - s_load_dwordx4 s[24:27], s[0:1], 0x0 - s_waitcnt 0 - - s_load_dwordx4 s[40:43], s[0:1], 0x20 - s_waitcnt 0 - - // v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x - v_mad_u32_u24 v3, v1, s2, v0 - v_mad_u32_u24 v3, v2, s3, v3 - - //s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x - s_mul_i32 s28, s_tgid_y, s5 - s_add_i32 s28, s28, s_tgid_x - s_mul_i32 s29, s6, s_tgid_z - s_add_i32 s28, s29, s28 - - //v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group - v_mov_b32 v9, s28 - v_mad_u32_u24 v9, v9, s4, v3 - - // Clear VGPR and LDS - s_movk_i32 m0, 0x0000 - s_mov_b32 s12, 0x000000f8 - s_set_gpr_idx_on s12, 0x8 -label_0004: - v_mov_b32 v0, 0 - v_mov_b32 v1, 0 - v_mov_b32 v2, 0 - v_mov_b32 v3, 0 - v_mov_b32 v4, 0 - v_mov_b32 v5, 0 - v_mov_b32 v6, 0 - v_mov_b32 v7, 0 - s_sub_u32 s12, s12, 8 - s_set_gpr_idx_idx s12 - s_cbranch_scc0 label_0004 - s_set_gpr_idx_off - v_mbcnt_lo_u32_b32 v1, exec_hi, 0 - v_mbcnt_hi_u32_b32 v1, exec_lo, v1 - v_mul_u32_u24 v1, 8, v1 - s_getreg_b32 s13, hwreg(HW_REG_HW_ID, 4, 2) - s_mulk_i32 s13, 0x4000 - v_add_co_u32 v1, vcc, v1, s13 - s_mov_b32 s12, 7 - s_mov_b32 m0, -1 -label_001B: - ds_write2_b64 v1, v[2:3], v[2:3] offset1:64 - ds_write2_b64 v1, v[4:5], v[4:5] offset0:128 offset1:192 - v_add_co_u32 v1, vcc, 0x00000800, v1 - s_sub_u32 s12, s12, 1 - s_cbranch_scc0 label_001B - - // Save coverage in the memory - s_getreg_b32 s20, hwreg(HW_REG_HW_ID, 0, 32) - // s12 = SIMD - s_lshr_b32 s12,s20,4 - s_and_b32 s12, s12, 0x3 - // s13 = CU - s_lshr_b32 s13,s20,8 - s_and_b32 s13, s13, 0xf - // s14 = SE - s_lshr_b32 s14,s20,13 - s_and_b32 s14, s14, 0x7 - // s15 = SE * 16 * 4 + CU * 4 + SIMD - s_mul_i32 s16, s14, 64 - s_mul_i32 s17, s13, 4 - s_add_i32 s15, s16, s17 - s_add_i32 s15, s15, s12 - s_mul_i32 s16, s15, 4 - - s_buffer_store_dword s15, s24, s16 glc - s_waitcnt 0 - - s_buffer_load_dword s17, s24, s16 glc - s_waitcnt 0 -s_endpgm -end diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_cp_sq.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_cp_sq.sp3 deleted file mode 100644 index e1554afa66..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_cp_sq.sp3 +++ /dev/null @@ -1,59 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(8) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset - -tgid_x_en(1) //s_tgid_x s8 -tgid_y_en(1) //s_tgid_y s9 -tgid_z_en(1) //s_tgid_z s10 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//read mem data -s_mov_b32 s31, 0x0 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:1 -s_waitcnt 0 - -v_mov_b32 v10, v0 -//buffer_load_dword v10, v9, s24, s31 idxen:1 glc:1 -//s_waitcnt 0 -//v_mov_b32 v11, v1 -s_nop 0x1 -s_nop 0x1 -s_nop 0x1 -s_nop 0x1 - -s_mov_b32 s16, 0xa5a50000 -s_store_dword s16, s[0:1], 0x40 glc - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_cp_sq_gds_read.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_cp_sq_gds_read.sp3 deleted file mode 100644 index a80ea56817..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_cp_sq_gds_read.sp3 +++ /dev/null @@ -1,60 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(8) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset - -tgid_x_en(1) //s_tgid_x s8 -tgid_y_en(1) //s_tgid_y s9 -tgid_z_en(1) //s_tgid_z s10 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - - -//read from the GDS -v_lshlrev_b32 v10, 2, v3 -s_mov_b32 m0, 0xFFFF -s_nop 0x1 -s_nop 0x1 -s_nop 0x1 - -ds_read_b32 v11, v10 gds:1 -s_waitcnt 0 - -v_mov_b32 v12, v11 - -s_nop 0x1 -s_nop 0x1 -s_nop 0x1 -s_nop 0x1 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_cs_trap_handler.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_cs_trap_handler.sp3 deleted file mode 100644 index 5e4eb34c79..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_cs_trap_handler.sp3 +++ /dev/null @@ -1,673 +0,0 @@ -shader main - -type(CS) - -/*************************************************************************/ -/* control on how to run the shader */ -/*************************************************************************/ -//any hack that needs to be made to run this code in EMU (either becasue various EMU code are not ready or no compute save & restore in EMU run) -var EMU_RUN_HACK = 1 -var EMU_RUN_HACK_RESTORE_NORMAL = 0 -var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0 -var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0 -var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK -var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK -var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK -var SAVE_LDS = 0 -var WG_BASE_ADDR_LO = 0x9000a000 -var WG_BASE_ADDR_HI = 0x0 -var WAVE_SPACE = 0x5000 //memory size that each wave occupies in workgroup state mem -var CTX_SAVE_CONTROL = 0x0 -var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL -var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either becasue various RTL code are not ready or no compute save & restore in RTL run) -var SGPR_SAVE_USE_SQC = 0 //use SQC D$ to do the write -var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //need to change BUF_DATA_FORMAT in S_SAVE_BUF_RSRC_WORD3_MISC from 0 to BUF_DATA_FORMAT_32 if set to 1 (i.e. 0x00827FAC) -var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing - -/**************************************************************************/ -/* variables */ -/**************************************************************************/ -var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 -var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 - -var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 -var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 -var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 -var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6 -var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24 -var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits - -var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 -var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 -var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100 -var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8 -var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF -var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0 -var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10 -var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800 -var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11 -var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21 - -var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME -var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME -var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME - -var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24 -var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27 - - -/* Save */ -var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes -var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE - -var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit -var S_SAVE_SPI_INIT_ATC_SHIFT = 27 -var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype -var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28 -var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG -var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 - -var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used -var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME -var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME -var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME - -var s_save_spi_init_lo = exec_lo -var s_save_spi_init_hi = exec_hi - - //tba_lo and tba_hi need to be saved/restored -var tba_lo = ttmp12 -var tba_hi = ttmp13 -var tma_lo = ttmp14 -var tma_hi = ttmp15 - -var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3¡¯h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} -var s_save_pc_hi = ttmp1 -var s_save_exec_lo = ttmp2 -var s_save_exec_hi = ttmp3 -var s_save_status = ttmp4 -var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine -var s_save_xnack_mask_lo = ttmp6 -var s_save_xnack_mask_hi = ttmp7 -var s_save_buf_rsrc0 = ttmp8 -var s_save_buf_rsrc1 = ttmp9 -var s_save_buf_rsrc2 = ttmp10 -var s_save_buf_rsrc3 = ttmp11 - -var s_save_mem_offset = tma_lo -var s_save_alloc_size = s_save_trapsts //conflict -var s_save_tmp = s_save_buf_rsrc2 //shared with s_save_buf_rsrc2 (conflict: should not use mem access with s_save_tmp at the same time) -var s_save_m0 = tma_hi - -/* Restore */ -var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE -var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC - -var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit -var S_RESTORE_SPI_INIT_ATC_SHIFT = 27 -var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype -var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28 -var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG -var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26 - -var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT -var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK -var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT -var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK - -var s_restore_spi_init_lo = exec_lo -var s_restore_spi_init_hi = exec_hi - -var s_restore_mem_offset = ttmp2 -var s_restore_alloc_size = ttmp3 -var s_restore_tmp = ttmp6 //tba_lo/hi need to be restored -var s_restore_mem_offset_save = s_restore_tmp //no conflict - -var s_restore_m0 = s_restore_alloc_size //no conflict - -var s_restore_mode = ttmp7 - -var s_restore_pc_lo = ttmp0 -var s_restore_pc_hi = ttmp1 -var s_restore_exec_lo = tma_lo //no conflict -var s_restore_exec_hi = tma_hi //no conflict -var s_restore_status = ttmp4 -var s_restore_trapsts = ttmp5 -var s_restore_xnack_mask_lo = xnack_mask_lo -var s_restore_xnack_mask_hi = xnack_mask_hi -var s_restore_buf_rsrc0 = ttmp8 -var s_restore_buf_rsrc1 = ttmp9 -var s_restore_buf_rsrc2 = ttmp10 -var s_restore_buf_rsrc3 = ttmp11 - -/**************************************************************************/ -/* trap handler entry points */ -/**************************************************************************/ - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) //hack to use trap_id for determining save/restore - //FIXME VCCZ un-init assertion s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC - s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000 //change SCC - s_cmp_eq_u32 s_save_tmp, 0x007e0000 //Save: trap_id = 0x7e. Restore: trap_id = 0x7f. - s_cbranch_scc0 L_JUMP_TO_RESTORE //do not need to recover STATUS here since we are going to RESTORE - //FIXME s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //need to recover STATUS since we are going to SAVE - s_branch L_SKIP_RESTORE //NOT restore, SAVE actually - else - s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save - end - -L_JUMP_TO_RESTORE: - s_branch L_RESTORE //restore - -L_SKIP_RESTORE: - - s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC - s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save - s_cbranch_scc1 L_SAVE //this is the operation for save - //the poential code (such as restore STATUS) on this path is for regular trap handling and don't care for compute save & restore - - //EMU will not execute the code since in hack mode it is skipped while in normal mode there is no save in EMU - //SIM will only execute the code in normal S/R mode but not in hack mode - if (!EMU_RUN_HACK) - L_ERROR: //to catch incorrect savectx setting in SIM assuming the trap handler is only used for save & restore - s_branch L_ERROR - end - -/**************************************************************************/ -/* save routine */ -/**************************************************************************/ - -L_SAVE: - - //check whether there is mem_viol - s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK - s_cbranch_scc0 L_NO_PC_REWIND - - //if so, need rewind PC assuming GDS operation gets NACKed - s_mov_b32 s_save_tmp, 0 //clear mem_viol bit - s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit - s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] - s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8 - s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0 // -scc - -L_NO_PC_REWIND: - s_mov_b32 s_save_tmp, 0 //clear saveCtx bit - s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit - - s_mov_b32 s_save_xnack_mask_lo, xnack_mask_lo //save XNACK_MASK - s_mov_b32 s_save_xnack_mask_hi, xnack_mask_hi - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT - s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT - s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) //save FIRST_REPLAY - s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT - s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY in IB_STS - s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG - - s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp - - /* inform SPI the readiness and wait for SPI's go signal */ - s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI - s_mov_b32 s_save_exec_hi, exec_hi - s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive - if (EMU_RUN_HACK) - - else - s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC - end - - L_SLEEP: - s_sleep 0x2 - - if (EMU_RUN_HACK) - - else - s_cbranch_execz L_SLEEP - end - - - /* setup Resource Contants */ - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE)) - //calculate wd_addr using absolute thread id - v_readlane_b32 s_save_tmp, v9, 0 - s_lshr_b32 s_save_tmp, s_save_tmp, 6 - s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE - s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO - s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI - s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL - else - end - if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE)) - s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO - s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI - s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL - else - end - - - s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo - s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi - s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE - s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited - s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC - s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK - s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position - s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC - s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK - s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position - s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE - - //FIXME right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi (might need to save them before using them?) - s_mov_b32 s_save_m0, m0 //save M0 - - /* global mem offset */ - s_mov_b32 s_save_mem_offset, 0x0 //mem offset initial value = 0 - - - /* the first wave in the threadgroup */ - s_barrier //FIXME not performance-optimal "LDS is used? wait for other waves in the same TG" - s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here - s_cbranch_scc0 L_SAVE_VGPR - - /* save LDS */ - ////////////////////////////// - L_SAVE_LDS: - - s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on - s_mov_b32 exec_hi, 0xFFFFFFFF - - s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size - s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? - s_cbranch_scc0 L_SAVE_VGPR //no lds used? jump to L_SAVE_VGPR - s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 6 //LDS size in dwords = lds_size * 64dw - s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //LDS size in bytes - s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - s_mov_b32 m0, 0x0 //lds_offset initial value = 0 - - L_SAVE_LDS_LOOP: - if (SAVE_LDS) - buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 - end - s_add_u32 m0, m0, 256 //every buffer_store_lds does 256 bytes - s_add_u32 s_save_mem_offset, s_save_mem_offset, 256 //mem offset increased by 256 bytes - s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_SAVE_LDS_LOOP //LDS save is complete? - - - /* save VGPRs */ - ////////////////////////////// - L_SAVE_VGPR: - - s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on - s_mov_b32 exec_hi, 0xFFFFFFFF - - s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size - s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 - s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible - s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - - s_mov_b32 m0, 0x0 //VGPR initial index value =0 - s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1 - s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later - - L_SAVE_VGPR_LOOP: - v_mov_b32 v0, v0 //v0 = v[0+m0] - - if(USE_MTBUF_INSTEAD_OF_MUBUF) - tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - else - buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 - end - - s_add_u32 m0, m0, 1 //next vgpr index - s_add_u32 s_save_mem_offset, s_save_mem_offset, 256 //every buffer_store_dword does 256 bytes - s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? - s_set_gpr_idx_off - - /* save SGPRs */ - ////////////////////////////// - s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size - s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 - s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) - - if (SGPR_SAVE_USE_SQC) - s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 2 //NUM_RECORDS in bytes - else - s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) - end - - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - - s_mov_b32 m0, 0x0 //SGPR initial index value =0 - s_nop 0x0 //Manually inserted wait states - - L_SAVE_SGPR_LOOP: - s_movrels_b32 s0, s0 //s0 = s[0+m0] - write_sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //PV: the best performance should be using s_buffer_store_dwordx4 - s_add_u32 m0, m0, 1 //next sgpr index - s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_SAVE_SGPR_LOOP //SGPR save is complete? - - /* save HW registers */ - ////////////////////////////// - L_SAVE_HWREG: - s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - - - write_sgpr_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //M0 - - if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME)) - s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 - s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over - s_mov_b32 tba_lo, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO - s_mov_b32 tba_hi, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI - end - - write_sgpr_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //PC - write_sgpr_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) - write_sgpr_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //EXEC - write_sgpr_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) - write_sgpr_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //STATUS - - //s_save_trapsts conflicts with s_save_alloc_size - s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - write_sgpr_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //TRAPSTS - - write_sgpr_to_mem(s_save_xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //XNACK_MASK_LO - write_sgpr_to_mem(s_save_xnack_mask_hi, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //XNACK_MASK_HI - - //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2 - s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE - write_sgpr_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) - - write_sgpr_to_mem(tba_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //TBA_LO - write_sgpr_to_mem(tba_hi, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //TBA_HI - - /* S_PGM_END_SAVED */ //FIXME graphics ONLY - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT)) - s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] - s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 - s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over - s_rfe_b64 s_save_pc_lo //Return to the main shader program - else - end - - - s_branch L_END_PGM - - - -/**************************************************************************/ -/* restore routine */ -/**************************************************************************/ - -L_RESTORE: - /* Setup Resource Contants */ - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) - //calculate wd_addr using absolute thread id - v_readlane_b32 s_restore_tmp, v9, 0 - s_lshr_b32 s_restore_tmp, s_restore_tmp, 6 - s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE - s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO - s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI - s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL - else - end - - s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo - s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi - s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE - s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) - s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK - s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position - s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK - s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position - s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE - - /* global mem offset */ - s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0 - - /* the first wave in the threadgroup */ - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK - s_cbranch_scc0 L_RESTORE_VGPR - - /* restore LDS */ - ////////////////////////////// - L_RESTORE_LDS: - - s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead - s_mov_b32 exec_hi, 0xFFFFFFFF - - s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size - s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero? - s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR - s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 6 //LDS size in dwords = lds_size * 64dw - s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //LDS size in bytes - s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - s_mov_b32 m0, 0x0 //lds_offset initial value = 0 - - L_RESTORE_LDS_LOOP: - if (SAVE_LDS) - buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 - end - s_add_u32 m0, m0, 256 //every buffer_load_dword does 256 bytes - s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256 bytes - s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_RESTORE_LDS_LOOP //LDS restore is complete? - - - /* restore VGPRs */ - ////////////////////////////// - L_RESTORE_VGPR: - - s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead - s_mov_b32 exec_hi, 0xFFFFFFFF - - s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size - s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 - s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) - s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last - s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 - s_mov_b32 m0, 1 //VGPR initial index value = 1 - s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8 - s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later - - L_RESTORE_VGPR_LOOP: - if(USE_MTBUF_INSTEAD_OF_MUBUF) - tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - else - buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 - end - s_waitcnt vmcnt(0) //ensure data ready - v_mov_b32 v0, v0 //v[0+m0] = v0 - s_add_u32 m0, m0, 1 //next vgpr index - s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //every buffer_load_dword does 256 bytes - s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_RESTORE_VGPR_LOOP //VGPR restore (except v0) is complete? - s_set_gpr_idx_off - /* VGPR restore on v0 */ - if(USE_MTBUF_INSTEAD_OF_MUBUF) - tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - else - buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 - end - - - /* restore SGPRs */ - ////////////////////////////// - s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size - s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 - s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) - - if (SGPR_SAVE_USE_SQC) - s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 2 //NUM_RECORDS in bytes - else - s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) - end - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - read_sgpr_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //save s0 to s_restore_tmp - s_mov_b32 m0, 0x1 //SGPR initial index value =1 //go on with with s1 - - L_RESTORE_SGPR_LOOP: - read_sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //PV: further performance improvement can be made - s_waitcnt lgkmcnt(0) //ensure data ready - s_movreld_b32 s0, s0 //s[0+m0] = s0 - s_add_u32 m0, m0, 1 //next sgpr index - s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_RESTORE_SGPR_LOOP //SGPR restore (except s0) is complete? - s_mov_b32 s0, s_restore_tmp /* SGPR restore on s0 */ - - /* restore HW registers */ - ////////////////////////////// - L_RESTORE_HWREG: - s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - - read_sgpr_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //M0 - read_sgpr_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //PC - read_sgpr_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) - read_sgpr_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //EXEC - read_sgpr_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) - read_sgpr_from_mem(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //STATUS - read_sgpr_from_mem(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //TRAPSTS - read_sgpr_from_mem(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //XNACK_MASK_LO - read_sgpr_from_mem(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //XNACK_MASK_HI - read_sgpr_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //MODE - read_sgpr_from_mem(tba_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //TBA_LO - read_sgpr_from_mem(tba_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //TBA_HI - - s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS - - s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS - - //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise: - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) - s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore) - s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over - end - if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL)) - s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4 //pc[31:0]+4 // save is hack through s_trap but restore is normal - s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over - end - - s_mov_b32 m0, s_restore_m0 - s_mov_b32 exec_lo, s_restore_exec_lo - s_mov_b32 exec_hi, s_restore_exec_hi - - s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts - s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0 - s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts - s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT - s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0 - //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore - s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode - //reuse s_restore_m0 as a temp register - s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT - s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT - s_mov_b32 s_restore_tmp, 0x0 //IB_STS is zero - s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 - s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT - s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT - s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 - s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT - s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp - s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status - - s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG //FIXME not performance-optimal at this time - - -// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution - s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc - - -/**************************************************************************/ -/* the END */ -/**************************************************************************/ -L_END_PGM: - s_endpgm - -end - - -/**************************************************************************/ -/* the helper functions */ -/**************************************************************************/ - -function write_sgpr_to_mem(s, s_rsrc, s_mem_offset, use_sqc, use_mtbuf) - if (use_sqc) - s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on - s_mov_b32 m0, s_mem_offset - s_buffer_store_dword s, s_rsrc, m0 glc:1 - s_add_u32 s_mem_offset, s_mem_offset, 4 - s_mov_b32 m0, exec_lo - elsif (use_mtbuf) - v_mov_b32 v0, s - tbuffer_store_format_x v0, v0, s_rsrc, s_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - s_add_u32 s_mem_offset, s_mem_offset, 256 - else - v_mov_b32 v0, s - buffer_store_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1 - s_add_u32 s_mem_offset, s_mem_offset, 256 - end -end - - - -function read_sgpr_from_mem(s, s_rsrc, s_mem_offset, use_sqc) - s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1 - if (use_sqc) - s_add_u32 s_mem_offset, s_mem_offset, 4 - else - s_add_u32 s_mem_offset, s_mem_offset, 256 - end -end - - - - - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_dc_restore.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_dc_restore.sp3 deleted file mode 100644 index 21130cd511..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_dc_restore.sp3 +++ /dev/null @@ -1,21 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(4) - v_mov_b32 v0, s0 - v_mov_b32 v1, s1 - v_mov_b32 v2, s2 - v_mov_b32 v3, s3 - flat_load_dword v4, v[0:1] slc - s_waitcnt vmcnt(0)&lgkmcnt(0) - v_mov_b32 v5, 0 - s_sleep 40000 -LOOP: - v_add_co_u32 v5, vcc, 1, v5 - s_waitcnt vmcnt(0)&lgkmcnt(0) - v_cmp_lt_u32 vcc, v5, v4 - s_cbranch_vccnz LOOP - flat_store_dword v[2,3], v5 - s_waitcnt vmcnt(0)&lgkmcnt(0) -s_endpgm -end diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_ea.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_ea.sp3 deleted file mode 100644 index 99147cd517..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_ea.sp3 +++ /dev/null @@ -1,69 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x80 - -STORE_LOOP: -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x100 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x100 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_mov_b32 s8, s30 -s_mov_b32 s31, 0x80 - -s_waitcnt 0 -LOAD_LOOP: -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x100 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x100 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 LOAD_LOOP - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_ea_02.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_ea_02.sp3 deleted file mode 100644 index 0b8e13bdb2..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_ea_02.sp3 +++ /dev/null @@ -1,131 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 - -s_waitcnt 0 - -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_ea_dirty.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_ea_dirty.sp3 deleted file mode 100644 index a3ba2787de..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_ea_dirty.sp3 +++ /dev/null @@ -1,61 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -s_mov_b32 s32, 0x4000 -v_mul_i32_i24 v9, v9, s32 - -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x10000 - -STORE_LOOP: -buffer_store_dword v0, v9, s24, s31 offen:1 -s_waitcnt 0 -s_add_u32 s31, s31, 0x10000 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_mov_b32 s16, 0xa5a50000 -s_store_dword s16, s[0:1], 0x40 glc - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_gds_02.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_gds_02.sp3 deleted file mode 100644 index 55f5ceec97..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_gds_02.sp3 +++ /dev/null @@ -1,79 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//read mem data -s_mov_b32 s31, 0x0 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:1 -s_waitcnt 0 - -//write it to GDS -s_mov_b32 s30, s8 -v_lshlrev_b32 v10, 2, v9 -s_mov_b32 m0, 0xFFFF -s_nop 0x1 -s_nop 0x1 -s_nop 0x1 - -STORE_LOOP: -ds_write_b32 v10, v0 gds:1 // GPU hang when GPU access the GDS with GFX queue -s_waitcnt 0 -v_add_u16 v10, v10, 0x10 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_mov_b32 s8, s30 -v_lshlrev_b32 v10, 2, v3 - -LOAD_LOOP: -ds_read_b32 v11, v10 gds:1 -s_waitcnt 0 -v_mov_b32 v12, v11 -v_add_u16 v10, v10, 0x10 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 LOAD_LOOP - -s_mov_b32 s16, 0xa5a50000 -s_store_dword s16, s[0:1], 0x40 glc - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_gds_address1.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_gds_address1.sp3 deleted file mode 100644 index 1df833258a..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_gds_address1.sp3 +++ /dev/null @@ -1,55 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(8) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset - -tgid_x_en(1) //s_tgid_x s8 -tgid_y_en(1) //s_tgid_y s9 -tgid_z_en(1) //s_tgid_z s10 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//read data from GDS -v_lshlrev_b32 v10, 2, v3 -s_mov_b32 m0, 0xFFFF -s_nop 1 -s_nop 1 -s_nop 1 -ds_read_b32 v11, v10 gds:1 -s_waitcnt 0 - -//write the data to memory -buffer_store_dword v11, v9, s24, s7 idxen:1 glc:1 -s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_gds_oa_phy.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_gds_oa_phy.sp3 deleted file mode 100644 index 93509f41cf..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_gds_oa_phy.sp3 +++ /dev/null @@ -1,68 +0,0 @@ -shader main - type(CS) - - - user_sgpr_count(4) - tgid_x_en(1) - tgid_y_en(1) - tgid_z_en(1) - - s_getreg_b32 s18, hwreg(HW_REG_HW_ID, 0, 32) - s_bfe_u32 s16, s18, 0x2001e // get meid - s_bfe_u32 s17, s18, 0x20006 // get pipeid - //s_add_u32 s17, s17, s16 - - // get ring id - v_mov_b32 v20, s17 - s_and_b32 s17, s17, 0x7 - - // Get thread_id inside wave - v_mbcnt_lo_u32_b32 v8, 0xffffffff, 0 - v_mbcnt_hi_u32_b32 v9, 0xffffffff, v8 - - s_waitcnt 0 - - // init: gds write address - v_mov_b32 v13, 0 - - // the first 128DW is for ordered-append counter - v_mov_b32 v14, 0x80 - - // offset ring - v_mov_b32 v15, 0x200 - - v_mul_lo_u32 v15, v15, v20 // ring offset - v_mov_b32 v16, 0x40 // wave_size - - v_mul_lo_u32 v18, v1, s1 - v_add_co_u32 v18, vcc, v18, v0 - v_lshrrev_b32 v17,6 ,v18 - s_mov_b32 s9, s12 - s_lshr_b32 s9, s9, 6 - s_and_b32 s9, s9, 0x7ff - s_lshl_b32 s17, s17, 18 - s_or_b32 s9, s9, s17 - s_mov_b32 m0, s9 - - v_mov_b32 v10, 1 - v_mov_b32 v11, 0 - ds_ordered_count v11, v10 gds:1 offset0:0 offset1:1 - s_waitcnt 0 - - v_mov_b32 v18, v11 - - v_mul_lo_u32 v16, v16, v18 // waves offset before. - v_add_co_u32 v13, vcc, v13, v14 - v_add_co_u32 v13, vcc, v13, v15 - v_add_co_u32 v13, vcc, v13, v16 - v_add_co_u32 v13, vcc, v13, v9 - - v_lshlrev_b32 v13,2,v13 - s_mov_b32 m0, 0x4000 - s_nop 0 - ds_write_b32 v13, v0 gds:1 - s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_lds_02.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_lds_02.sp3 deleted file mode 100644 index 780e9f9af7..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_lds_02.sp3 +++ /dev/null @@ -1,79 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//read mem data -s_mov_b32 s31, 0x0 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:1 -s_waitcnt 0 - -//store and load s8 times -s_mov_b32 s30, s8 -v_lshlrev_b32 v10, 2, v3 -s_mov_b32 m0, 0xFFFF -s_nop 0x1 -s_nop 0x1 -s_nop 0x1 - -STORE_LOOP: -ds_write_b32 v10, v0 -s_waitcnt 0 -v_add_u16 v10, v10, 0x10 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_mov_b32 s8, s30 -v_lshlrev_b32 v10, 2, v3 - -LOAD_LOOP: -ds_read_b32 v11, v10 -s_waitcnt 0 -v_mov_b32 v12, v11 -v_add_u16 v10, v10, 0x10 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 LOAD_LOOP - -s_mov_b32 s16, 0xa5a50000 -s_store_dword s16, s[0:1], 0x40 glc - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_lds_address1.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_lds_address1.sp3 deleted file mode 100644 index 09618ba90f..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_lds_address1.sp3 +++ /dev/null @@ -1,55 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(8) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset - -tgid_x_en(1) //s_tgid_x s8 -tgid_y_en(1) //s_tgid_y s9 -tgid_z_en(1) //s_tgid_z s10 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//read it from LDS -v_lshlrev_b32 v10, 2, v3 -s_mov_b32 m0, 0xFFFF -s_nop 1 -s_nop 1 -s_nop 1 -ds_read_b32 v0, v10 -s_waitcnt 0 - -//write the data to memory -buffer_store_dword v0, v9, s24, s7 idxen:1 glc:1 -s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_mc_read.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_mc_read.sp3 deleted file mode 100644 index 4c9f7ec36a..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_mc_read.sp3 +++ /dev/null @@ -1,52 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(8) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset - -tgid_x_en(1) //s_tgid_x s8 -tgid_y_en(1) //s_tgid_y s9 -tgid_z_en(1) //s_tgid_z s10 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//read mem data -s_mov_b32 s31, 0x0 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:1 -s_waitcnt 0 - - -//export poisoned data to L2 -buffer_store_dword v0, v9, s24, s7 idxen:1 glc:1 -s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_mc_read_tcc_halt.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_mc_read_tcc_halt.sp3 deleted file mode 100644 index e13d811bd4..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_mc_read_tcc_halt.sp3 +++ /dev/null @@ -1,77 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(8) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset - -tgid_x_en(1) //s_tgid_x s8 -tgid_y_en(1) //s_tgid_y s9 -tgid_z_en(1) //s_tgid_z s10 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//read mem data -s_mov_b32 s31, 0x0 -//For vega20, we need to set bit 12 low. This bit will just be set low here in the shader. -//s_mov_b32 s24, 0x15c000 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:1 -s_waitcnt 0 - -//store it 10 times -v_mov_b32 v10, v0 -v_mov_b32 v11, v0 -v_mov_b32 v12, v0 -v_mov_b32 v13, v0 -v_mov_b32 v14, v0 -v_mov_b32 v15, v0 -v_mov_b32 v16, v0 -v_mov_b32 v17, v0 -v_mov_b32 v18, v0 -v_mov_b32 v19, v0 - -// read them back -v_mov_b32 v29, v10 -v_mov_b32 v28, v11 -v_mov_b32 v27, v12 -v_mov_b32 v26, v13 -v_mov_b32 v25, v14 -v_mov_b32 v24, v15 -v_mov_b32 v23, v16 -v_mov_b32 v22, v17 -v_mov_b32 v21, v18 -v_mov_b32 v20, v19 - -//export poisoned data to L2 -buffer_store_dword v0, v9, s24, s7 idxen:1 glc:1 -s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_no_pollute.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_no_pollute.sp3 deleted file mode 100644 index e1c141b7bd..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_no_pollute.sp3 +++ /dev/null @@ -1,51 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(8) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset - -tgid_x_en(1) //s_tgid_x s8 -tgid_y_en(1) //s_tgid_y s9 -tgid_z_en(1) //s_tgid_z s10 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//read mem data -s_mov_b32 s31, 0x0 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:1 -s_waitcnt 0 - -//export poisoned data to L2 -buffer_store_dword v0, v9, s24, s7 idxen:1 glc:1 -s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sgpr_02.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sgpr_02.sp3 deleted file mode 100644 index 76b575d084..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sgpr_02.sp3 +++ /dev/null @@ -1,55 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//SPI may touch s0...sn before shader is run - -s_mov_b32 s16, s2 - -//write data -s_mov_b32 s30, s0 -s_mov_b32 s31, s1 -s_mov_b32 s32, s2 -s_mov_b32 s33, s3 -s_mov_b32 s34, s4 -s_mov_b32 s35, s5 -s_mov_b32 s36, s6 -s_mov_b32 s37, s7 -s_mov_b32 s38, s8 -s_mov_b32 s39, s9 - -//read back -s_mov_b32 s0, s30 -s_mov_b32 s1, s31 -s_mov_b32 s2, s32 -s_mov_b32 s3, s33 -s_mov_b32 s4, s34 -s_mov_b32 s5, s35 -s_mov_b32 s6, s36 -s_mov_b32 s7, s37 -s_mov_b32 s8, s38 -s_mov_b32 s9, s39 - -s_store_dword s16, s[0:1], 0x0 glc - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sh_atcl1.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sh_atcl1.sp3 deleted file mode 100644 index 4058f5b7dc..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sh_atcl1.sp3 +++ /dev/null @@ -1,75 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_load_dwordx4 s[20:23], s[0:1], 16 // load atc mem surface rsrc -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - - -var MTYPE_UC = 0x38000000 -s_or_b32 s27, s27, MTYPE_UC - -s_mov_b32 s30, s8 -s_mov_b32 m0, 0x0 - - -STORE_LOOP: -s_buffer_store_dword s8, s[20:23], m0 glc:1 -s_waitcnt 0 -s_add_u32 m0, m0, 4*1024 // step one 4KB page table address -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -var DEBUG_FUNCTION = 0 -// Remove function check code to half shader run time... -if DEBUG_FUNCTION -s_mov_b32 s8, s30 -s_mov_b32 m0, 0x0 - -LOAD_LOOP: -s_buffer_load_dword s0, s[20:23], m0 glc:1 -s_waitcnt 0 -s_add_u32 m0, m0, 4*1024 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 LOAD_LOOP -end - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sqc_data_bank.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sqc_data_bank.sp3 deleted file mode 100644 index fe1b4c2a60..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sqc_data_bank.sp3 +++ /dev/null @@ -1,96 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -/* -s_bfe_u32 s33, s8, 0x20004 // extract bank select bits -s_lshl_b32 s33, s33, 6 // ((bank_sel & 0x3) << 6) , bank_sel = address[9:8] ^ address[7:6], if 4 bank enabled -s_and_b32 s8, s8, 0xf -*/ - -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - - -s_or_b32 s26, s26, 0x1000 //hack the buffer size to enough - -STORE_LOOP: - -var TOUCH_4_BANKS=1 -if TOUCH_4_BANKS -s_mov_b32 m0, 0x0 // BANKA -s_buffer_store_dword s8, s[24:27], m0 glc:1 -s_waitcnt 0 -s_buffer_load_dword s32, s[24:27], m0 glc:1 -s_waitcnt 0 - - -s_mov_b32 m0, 0x40 // BANKB -s_buffer_store_dword s8, s[24:27], m0 glc:1 -s_waitcnt 0 -s_buffer_load_dword s32, s[24:27], m0 glc:1 -s_waitcnt 0 - -/* -s_mov_b32 m0, 0x80 // BANKC -s_buffer_store_dword s8, s[24:27], m0 glc:1 -s_waitcnt 0 -s_buffer_load_dword s32, s[24:27], m0 glc:1 -s_waitcnt 0 - - -s_mov_b32 m0, 0xC0 // BANKD -s_buffer_store_dword s8, s[24:27], m0 glc:1 -s_waitcnt 0 -s_buffer_load_dword s32, s[24:27], m0 glc:1 -s_waitcnt 0 -*/ - -end - -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_dcache_wb // to make emu, sim img match... - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sqc_data_bank_tag.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sqc_data_bank_tag.sp3 deleted file mode 100644 index f7df170170..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sqc_data_bank_tag.sp3 +++ /dev/null @@ -1,96 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -/* -s_bfe_u32 s33, s8, 0x20004 // extract bank select bits -s_lshl_b32 s33, s33, 6 // ((bank_sel & 0x3) << 6) , bank_sel = address[9:8] ^ address[7:6], if 4 bank enabled -s_and_b32 s8, s8, 0xf -*/ - -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - - -s_or_b32 s26, s26, 0x1000 //hack the buffer size to enough - -STORE_LOOP: - -var TOUCH_4_BANKS=1 -if TOUCH_4_BANKS -s_mov_b32 m0, 0x0 // BANKA -s_buffer_store_dword s8, s[24:27], m0 glc:0 -s_waitcnt 0 -s_buffer_load_dword s32, s[24:27], m0 glc:0 -s_waitcnt 0 - - -s_mov_b32 m0, 0x40 // BANKB -s_buffer_store_dword s8, s[24:27], m0 glc:0 -s_waitcnt 0 -s_buffer_load_dword s32, s[24:27], m0 glc:0 -s_waitcnt 0 - -/* -s_mov_b32 m0, 0x80 // BANKC -s_buffer_store_dword s8, s[24:27], m0 glc:1 -s_waitcnt 0 -s_buffer_load_dword s32, s[24:27], m0 glc:1 -s_waitcnt 0 - - -s_mov_b32 m0, 0xC0 // BANKD -s_buffer_store_dword s8, s[24:27], m0 glc:1 -s_waitcnt 0 -s_buffer_load_dword s32, s[24:27], m0 glc:1 -s_waitcnt 0 -*/ - -end - -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_dcache_wb // to make emu, sim img match... - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sqc_dirty_bit.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sqc_dirty_bit.sp3 deleted file mode 100644 index dd766baed3..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sqc_dirty_bit.sp3 +++ /dev/null @@ -1,112 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -s_or_b32 s27, s27, 0x8000000 // changing mtype to non volatile -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - - -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - -s_mov_b32 s9, 0xaa -s_mov_b32 s10, 0xbb -s_mov_b32 s11, 0xcc - -// BUFFER STORE OFFSETS FOR BANK A AND BANKB -s_mov_b32 s12, 0x0 -s_mov_b32 s13, 0x10 -s_mov_b32 s14, 0x40 -s_mov_b32 s15, 0x50 - - -// The following sequence is needed to inject error in dirty bit ram. Sequence was provided by SQC designer 4/1/2015 -//1. you have an invalid line in data cache, -//2. you write to some of the dwords in that line (the remaining dwords are still invalid), -//3. then there is a read request that hit on that line, but it needs the dwords that are not yet there in that line -//(in other words, it needs some of the invalid dwords of that line), -//4. the request will go to TC, -//5. when TC return comes back, the dirty bit rm will be read - -STORE_LOOP: - -var TOUCH_4_BANKS=1 -if TOUCH_4_BANKS - -s_mov_b32 m0, s13 // BANKA write one dword to tc -s_buffer_store_dwordx2 s[8:9], s[24:27], m0 glc:1 -s_waitcnt 0 - - -s_mov_b32 m0, s12 // BANKA. write one dword to sqc -s_buffer_store_dwordx2 s[10:11], s[24:27], m0 glc:0 -s_waitcnt 0 - -s_mov_b32 m0, s13 // BANK A read the dword that is not in cache -s_buffer_load_dword s32, s[24:27], m0 glc:0 -s_waitcnt 0 - -s_mov_b32 m0, s15 // BANKB write one dword to tc -s_buffer_store_dwordx2 s[8:9], s[24:27], m0 glc:1 -s_waitcnt 0 - -s_mov_b32 m0, s14 // BANKB write one dword to sqc -s_buffer_store_dwordx2 s[10:11], s[24:27], m0 glc:0 -s_waitcnt 0 - -s_mov_b32 m0, s15 // BANK B read the dword that is not in cache -s_buffer_load_dword s32, s[24:27], m0 glc:0 -s_waitcnt 0 -end - -s_add_u32 s12, s12,0x80 -s_add_u32 s13, s13,0x80 -s_add_u32 s14, s14,0x80 -s_add_u32 s15, s15,0x80 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_dcache_wb // to make emu, sim img match... - -s_mov_b32 s16, 0xa5a50000 -s_store_dword s16, s[0:1], 0x40 glc - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sqc_inst_atcl1.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sqc_inst_atcl1.sp3 deleted file mode 100644 index 3d3186d362..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sqc_inst_atcl1.sp3 +++ /dev/null @@ -1,63 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_load_dwordx4 s[20:23], s[0:1], 16 // load atc mem surface rsrc -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - - -var MTYPE_UC = 0x38000000 -s_or_b32 s27, s27, MTYPE_UC - -label inst_page[34+1] // 34 4k pages - -for var i =0; i < 34; i++ -inst_page[i]: - //each block are 4k side... - s_cbranch_execnz inst_page[i+1] //1 dword - for var j = 0; j < (4*1024)/4 -1; j++ - v_mov_b32 v0, 0 // each with 1 dword - end - -end -inst_page[34]: - - - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sqc_inst_bank.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sqc_inst_bank.sp3 deleted file mode 100644 index 64e1084623..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sqc_inst_bank.sp3 +++ /dev/null @@ -1,69 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -// don't care about the loop count, fix 8 loops -// Totaly number of cacheline equals 2(A,B,)*8 - -var num_cache_lines = 16 -label BLOCK_64B[num_cache_lines] - - -for var loop = 0; loop < num_cache_lines - 1; loop++ -BLOCK_64B[loop]: - s_branch BLOCK_64B[loop+1] // 1DW - for var i = 0; i < 15; i++ - v_nop - end - -end - -// last block -for var i = 0; i < 15; i++ - v_nop -end -//For uei 2 msb and lsb flipped -// s_nop will become v_nop and it will a legal instruction -BLOCK_64B[num_cache_lines-1]: - for var i = 0; i < 81; i++ - s_nop 0x1 - end -s_endpgm -end - -/** comment, four bank interleave -Addr 0x90000000 => Bank A -Addr 0x90000040 => Bank B -Addr 0x90000080 => Bank C -Addr 0x900000c0 => Bank D -Addr 0x90000100 => Bank B -Addr 0x90000140 => Bank A -Addr 0x90000180 => Bank D -Addr 0x900001c0 => Bank C -Addr 0x90000200 => Bank C -Addr 0x90000240 => Bank D -Addr 0x90000280 => Bank A -Addr 0x900002c0 => Bank B -Addr 0x90000300 => Bank D -Addr 0x90000340 => Bank C -Addr 0x90000380 => Bank B - -**/ diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sqc_inst_bank_snop.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sqc_inst_bank_snop.sp3 deleted file mode 100644 index ba62535613..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_sqc_inst_bank_snop.sp3 +++ /dev/null @@ -1,29 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - - for var i = 0; i < 1000; i++ - s_nop 0x1 - end - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_ta_address1.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_ta_address1.sp3 deleted file mode 100644 index 04e7a3a285..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_ta_address1.sp3 +++ /dev/null @@ -1,51 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(8) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset - -tgid_x_en(1) //s_tgid_x s8 -tgid_y_en(1) //s_tgid_y s9 -tgid_z_en(1) //s_tgid_z s10 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//read from memory -s_mov_b32 s31, 0x0 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:1 -s_waitcnt 0 - -//write the data to memory -buffer_store_dword v0, v9, s24, s7 idxen:1 glc:1 -s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tc_02.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tc_02.sp3 deleted file mode 100644 index 46b8b9b0a0..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tc_02.sp3 +++ /dev/null @@ -1,73 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -s_mov_b32 s16, 0xa5a50001 -s_store_dword s16, s[0:1], 0x40 glc - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - -STORE_LOOP: -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_waitcnt 0 -s_add_u32 s31, s31, 0x4 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_mov_b32 s8, s30 -s_mov_b32 s31, 0x0 - -LOAD_LOOP: -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_waitcnt 0 -v_mov_b32 v12, v0 -s_add_u32 s31, s31, 0x4 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 LOAD_LOOP - -s_mov_b32 s16, 0xa5a50000 -s_store_dword s16, s[0:1], 0x40 glc - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tc_02_ea.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tc_02_ea.sp3 deleted file mode 100644 index a374f8454d..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tc_02_ea.sp3 +++ /dev/null @@ -1,71 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -//For vega20, we need to set bit 12 low to steer traffic to ea0 -s_mov_b32 s32, 0xFFFFEFFF -s_and_b32 s24, s24, s32 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - -STORE_LOOP: -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_waitcnt 0 -s_add_u32 s31, s31, 0x4 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_mov_b32 s8, s30 -s_mov_b32 s31, 0x0 - -LOAD_LOOP: -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_waitcnt 0 -v_mov_b32 v12, v0 -s_add_u32 s31, s31, 0x4 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 LOAD_LOOP - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tc_atomic_02.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tc_atomic_02.sp3 deleted file mode 100644 index 70439d9d5c..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tc_atomic_02.sp3 +++ /dev/null @@ -1,345 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -//set bit 12 low to select EA0 -s_mov_b32 s32, 0xFFFFEFFF -s_and_b32 s24, s24, s32 - -s_and_b32 s31, s9, 0x1 -s_cmpk_eq_i32 s31, 0x1 -s_cbranch_scc1 ODD_WAVES - -//set bit 12 high to select EA1 -s_mov_b32 s32, 0x1000 -s_or_b32 s24, s24, s32 - -ODD_WAVES: - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -v_mul_i32_i24 v3, v3, 0x10 -v_mul_i32_i24 v9, v9, 0x10 - -s_mov_b32 s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 - -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 - -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 - -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 - -s_barrier - -s_mov_b32 s31, 0xF0000 -s_mov_b32 s32, 0x6000 - -//store and load s8 times -s_mov_b32 s30, s8 - -s_cmpk_lt_i32 s9, 0x2 -s_cbranch_scc1 ATOMIC_LOOP - -LOAD_LOOP: - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -s_sub_u32 s30, s30, 1 -s_cmpk_eq_u32 s30, 0 -s_cbranch_scc0 LOAD_LOOP - - -s_cmpk_ge_i32 s9, 0x2 -s_cbranch_scc1 END - -ATOMIC_LOOP: - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -s_sub_u32 s30, s30, 1 -s_cmpk_eq_u32 s30, 0 -s_cbranch_scc0 ATOMIC_LOOP - -//s_waitcnt 0 - -END: -s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tc_atomic_03.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tc_atomic_03.sp3 deleted file mode 100644 index bd4c14ba21..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tc_atomic_03.sp3 +++ /dev/null @@ -1,509 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -//set bit 12 low to select EA0 -s_mov_b32 s32, 0xFFFFEFFF -s_and_b32 s24, s24, s32 - -s_and_b32 s31, s9, 0x1 -s_cmpk_eq_i32 s31, 0x1 -s_cbranch_scc1 ODD_WAVES - -//set bit 12 high to select EA1 -s_mov_b32 s32, 0x1000 -s_or_b32 s24, s24, s32 - -ODD_WAVES: - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -v_mul_i32_i24 v3, v3, 0x10 -v_mul_i32_i24 v9, v9, 0x10 - -s_mov_b32 s31, 0x9000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 - -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 - -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 - -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 - -s_barrier - -s_mov_b32 s31, 0xF0000 -s_mov_b32 s32, 0x9000 - -//store and load s8 times -s_mov_b32 s30, s8 - -s_cmpk_lt_i32 s9, 0x2 -s_cbranch_scc1 ATOMIC_LOOP - -s_mov_b32 s20, 0x1 - -LOAD_LOOP: - -s_atomic_add s20, s0, 0x100000 -s_atomic_add s20, s0, 0x100010 -s_atomic_add s20, s0, 0x100020 -s_atomic_add s20, s0, 0x100030 -s_atomic_add s20, s0, 0x100040 -s_atomic_add s20, s0, 0x100050 -s_atomic_add s20, s0, 0x100060 -s_atomic_add s20, s0, 0x100070 -s_atomic_add s20, s0, 0x100080 -s_atomic_add s20, s0, 0x100090 - -s_atomic_add s20, s0, 0x100100 -s_atomic_add s20, s0, 0x100110 -s_atomic_add s20, s0, 0x100120 -s_atomic_add s20, s0, 0x100130 -s_atomic_add s20, s0, 0x100140 -s_atomic_add s20, s0, 0x100150 -s_atomic_add s20, s0, 0x100160 -s_atomic_add s20, s0, 0x100170 -s_atomic_add s20, s0, 0x100180 -s_atomic_add s20, s0, 0x100190 - -s_atomic_add s20, s0, 0x100200 -s_atomic_add s20, s0, 0x100210 -s_atomic_add s20, s0, 0x100220 -s_atomic_add s20, s0, 0x100230 -s_atomic_add s20, s0, 0x100240 -s_atomic_add s20, s0, 0x100250 -s_atomic_add s20, s0, 0x100260 -s_atomic_add s20, s0, 0x100270 -s_atomic_add s20, s0, 0x100280 -s_atomic_add s20, s0, 0x100290 - -s_atomic_add s20, s0, 0x100300 -s_atomic_add s20, s0, 0x100310 -s_atomic_add s20, s0, 0x100320 -s_atomic_add s20, s0, 0x100330 -s_atomic_add s20, s0, 0x100340 -s_atomic_add s20, s0, 0x100350 -s_atomic_add s20, s0, 0x100360 -s_atomic_add s20, s0, 0x100370 -s_atomic_add s20, s0, 0x100380 -s_atomic_add s20, s0, 0x100390 - -s_atomic_add s20, s0, 0x100400 -s_atomic_add s20, s0, 0x100404 -s_atomic_add s20, s0, 0x100408 -s_atomic_add s20, s0, 0x10040c -s_atomic_add s20, s0, 0x100410 -s_atomic_add s20, s0, 0x100414 -s_atomic_add s20, s0, 0x100418 -s_atomic_add s20, s0, 0x10041c -s_atomic_add s20, s0, 0x100420 -s_atomic_add s20, s0, 0x100424 -s_atomic_add s20, s0, 0x100428 -s_atomic_add s20, s0, 0x10042c - -s_atomic_add s20, s0, 0x100500 -s_atomic_add s20, s0, 0x100504 -s_atomic_add s20, s0, 0x100508 -s_atomic_add s20, s0, 0x10050c -s_atomic_add s20, s0, 0x100510 -s_atomic_add s20, s0, 0x100514 -s_atomic_add s20, s0, 0x100518 -s_atomic_add s20, s0, 0x10051c -s_atomic_add s20, s0, 0x100520 -s_atomic_add s20, s0, 0x100524 -s_atomic_add s20, s0, 0x100528 -s_atomic_add s20, s0, 0x10052c - -s_atomic_add s20, s0, 0x100600 -s_atomic_add s20, s0, 0x100604 -s_atomic_add s20, s0, 0x100608 -s_atomic_add s20, s0, 0x10060c -s_atomic_add s20, s0, 0x100610 -s_atomic_add s20, s0, 0x100614 -s_atomic_add s20, s0, 0x100618 -s_atomic_add s20, s0, 0x10061c -s_atomic_add s20, s0, 0x100620 -s_atomic_add s20, s0, 0x100624 -s_atomic_add s20, s0, 0x100628 -s_atomic_add s20, s0, 0x10062c - -s_atomic_add s20, s0, 0x100700 -s_atomic_add s20, s0, 0x100704 -s_atomic_add s20, s0, 0x100708 -s_atomic_add s20, s0, 0x10070c -s_atomic_add s20, s0, 0x100710 -s_atomic_add s20, s0, 0x100714 -s_atomic_add s20, s0, 0x100718 -s_atomic_add s20, s0, 0x10071c -s_atomic_add s20, s0, 0x100720 -s_atomic_add s20, s0, 0x100724 -s_atomic_add s20, s0, 0x100728 -s_atomic_add s20, s0, 0x10072c - -s_atomic_add s20, s0, 0x100800 -s_atomic_add s20, s0, 0x100804 -s_atomic_add s20, s0, 0x100808 -s_atomic_add s20, s0, 0x10080c -s_atomic_add s20, s0, 0x100810 -s_atomic_add s20, s0, 0x100814 -s_atomic_add s20, s0, 0x100818 -s_atomic_add s20, s0, 0x10081c -s_atomic_add s20, s0, 0x100820 -s_atomic_add s20, s0, 0x100824 -s_atomic_add s20, s0, 0x100828 -s_atomic_add s20, s0, 0x10082c - -s_atomic_add s20, s0, 0x100900 -s_atomic_add s20, s0, 0x100904 -s_atomic_add s20, s0, 0x100908 -s_atomic_add s20, s0, 0x10090c -s_atomic_add s20, s0, 0x100910 -s_atomic_add s20, s0, 0x100914 -s_atomic_add s20, s0, 0x100918 -s_atomic_add s20, s0, 0x10091c -s_atomic_add s20, s0, 0x100920 -s_atomic_add s20, s0, 0x100924 -s_atomic_add s20, s0, 0x100928 -s_atomic_add s20, s0, 0x10092c - -s_atomic_add s20, s0, 0x100a00 -s_atomic_add s20, s0, 0x100a04 -s_atomic_add s20, s0, 0x100a08 -s_atomic_add s20, s0, 0x100a0c -s_atomic_add s20, s0, 0x100a10 -s_atomic_add s20, s0, 0x100a14 -s_atomic_add s20, s0, 0x100a18 -s_atomic_add s20, s0, 0x100a1c -s_atomic_add s20, s0, 0x100a20 -s_atomic_add s20, s0, 0x100a24 -s_atomic_add s20, s0, 0x100a28 -s_atomic_add s20, s0, 0x100a2c - -s_atomic_add s20, s0, 0x100b00 -s_atomic_add s20, s0, 0x100b04 -s_atomic_add s20, s0, 0x100b08 -s_atomic_add s20, s0, 0x100b0c -s_atomic_add s20, s0, 0x100b10 -s_atomic_add s20, s0, 0x100b14 -s_atomic_add s20, s0, 0x100b18 -s_atomic_add s20, s0, 0x100b1c -s_atomic_add s20, s0, 0x100b20 -s_atomic_add s20, s0, 0x100b24 -s_atomic_add s20, s0, 0x100b28 -s_atomic_add s20, s0, 0x100b2c - -s_atomic_add s20, s0, 0x100c00 -s_atomic_add s20, s0, 0x100c04 -s_atomic_add s20, s0, 0x100c08 -s_atomic_add s20, s0, 0x100c0c -s_atomic_add s20, s0, 0x100c10 -s_atomic_add s20, s0, 0x100c14 -s_atomic_add s20, s0, 0x100c18 -s_atomic_add s20, s0, 0x100c1c -s_atomic_add s20, s0, 0x100c20 -s_atomic_add s20, s0, 0x100c24 -s_atomic_add s20, s0, 0x100c28 -s_atomic_add s20, s0, 0x100c2c - - -s_sub_u32 s30, s30, 1 -s_cmpk_eq_u32 s30, 0 -s_cbranch_scc0 LOAD_LOOP - - -s_cmpk_ge_i32 s9, 0x2 -s_cbranch_scc1 END - -ATOMIC_LOOP: - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -s_sub_u32 s30, s30, 1 -s_cmpk_eq_u32 s30, 0 -s_cbranch_scc0 ATOMIC_LOOP - -//s_waitcnt 0 - -END: -s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tc_atomic_04.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tc_atomic_04.sp3 deleted file mode 100644 index c93b2992f0..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tc_atomic_04.sp3 +++ /dev/null @@ -1,2816 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -//set bit 12 low to select EA0 -s_mov_b32 s32, 0xFFFFEFFF -s_and_b32 s24, s24, s32 - -s_and_b32 s31, s9, 0x1 -s_cmpk_eq_i32 s31, 0x1 -s_cbranch_scc1 ODD_WAVES - -//set bit 12 high to select EA1 -s_mov_b32 s32, 0x1000 -s_or_b32 s24, s24, s32 - -ODD_WAVES: - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -v_mul_i32_i24 v3, v3, 0x10 -v_mul_i32_i24 v9, v9, 0x10 - -s_mov_b32 s31, 0x9000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 - -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 - -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 - -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 - -s_barrier - -s_mov_b32 s31, 0xF0000 -s_mov_b32 s32, 0x9000 - -//store and load s8 times -s_mov_b32 s30, s8 - -s_cmpk_lt_i32 s9, 0x2 -s_cbranch_scc1 ATOMIC_LOOP - -s_mov_b32 s20, 0x1 - -LOAD_LOOP: - -s_atomic_add s20, s0, 0x100000 -s_atomic_add s20, s0, 0x100010 -s_atomic_add s20, s0, 0x100020 -s_atomic_add s20, s0, 0x100030 -s_atomic_add s20, s0, 0x100040 -s_atomic_add s20, s0, 0x100050 -s_atomic_add s20, s0, 0x100060 -s_atomic_add s20, s0, 0x100070 -s_atomic_add s20, s0, 0x100080 -s_atomic_add s20, s0, 0x100090 - -s_atomic_add s20, s0, 0x100100 -s_atomic_add s20, s0, 0x100110 -s_atomic_add s20, s0, 0x100120 -s_atomic_add s20, s0, 0x100130 -s_atomic_add s20, s0, 0x100140 -s_atomic_add s20, s0, 0x100150 -s_atomic_add s20, s0, 0x100160 -s_atomic_add s20, s0, 0x100170 -s_atomic_add s20, s0, 0x100180 -s_atomic_add s20, s0, 0x100190 - -s_atomic_add s20, s0, 0x100200 -s_atomic_add s20, s0, 0x100210 -s_atomic_add s20, s0, 0x100220 -s_atomic_add s20, s0, 0x100230 -s_atomic_add s20, s0, 0x100240 -s_atomic_add s20, s0, 0x100250 -s_atomic_add s20, s0, 0x100260 -s_atomic_add s20, s0, 0x100270 -s_atomic_add s20, s0, 0x100280 -s_atomic_add s20, s0, 0x100290 - -s_atomic_add s20, s0, 0x100300 -s_atomic_add s20, s0, 0x100310 -s_atomic_add s20, s0, 0x100320 -s_atomic_add s20, s0, 0x100330 -s_atomic_add s20, s0, 0x100340 -s_atomic_add s20, s0, 0x100350 -s_atomic_add s20, s0, 0x100360 -s_atomic_add s20, s0, 0x100370 -s_atomic_add s20, s0, 0x100380 -s_atomic_add s20, s0, 0x100390 - -s_atomic_add s20, s0, 0x100400 -s_atomic_add s20, s0, 0x100404 -s_atomic_add s20, s0, 0x100408 -s_atomic_add s20, s0, 0x10040c -s_atomic_add s20, s0, 0x100410 -s_atomic_add s20, s0, 0x100414 -s_atomic_add s20, s0, 0x100418 -s_atomic_add s20, s0, 0x10041c -s_atomic_add s20, s0, 0x100420 -s_atomic_add s20, s0, 0x100424 -s_atomic_add s20, s0, 0x100428 -s_atomic_add s20, s0, 0x10042c - -s_atomic_add s20, s0, 0x100500 -s_atomic_add s20, s0, 0x100504 -s_atomic_add s20, s0, 0x100508 -s_atomic_add s20, s0, 0x10050c -s_atomic_add s20, s0, 0x100510 -s_atomic_add s20, s0, 0x100514 -s_atomic_add s20, s0, 0x100518 -s_atomic_add s20, s0, 0x10051c -s_atomic_add s20, s0, 0x100520 -s_atomic_add s20, s0, 0x100524 -s_atomic_add s20, s0, 0x100528 -s_atomic_add s20, s0, 0x10052c - -s_atomic_add s20, s0, 0x100600 -s_atomic_add s20, s0, 0x100604 -s_atomic_add s20, s0, 0x100608 -s_atomic_add s20, s0, 0x10060c -s_atomic_add s20, s0, 0x100610 -s_atomic_add s20, s0, 0x100614 -s_atomic_add s20, s0, 0x100618 -s_atomic_add s20, s0, 0x10061c -s_atomic_add s20, s0, 0x100620 -s_atomic_add s20, s0, 0x100624 -s_atomic_add s20, s0, 0x100628 -s_atomic_add s20, s0, 0x10062c - -s_atomic_add s20, s0, 0x100700 -s_atomic_add s20, s0, 0x100704 -s_atomic_add s20, s0, 0x100708 -s_atomic_add s20, s0, 0x10070c -s_atomic_add s20, s0, 0x100710 -s_atomic_add s20, s0, 0x100714 -s_atomic_add s20, s0, 0x100718 -s_atomic_add s20, s0, 0x10071c -s_atomic_add s20, s0, 0x100720 -s_atomic_add s20, s0, 0x100724 -s_atomic_add s20, s0, 0x100728 -s_atomic_add s20, s0, 0x10072c - -s_atomic_add s20, s0, 0x100800 -s_atomic_add s20, s0, 0x100804 -s_atomic_add s20, s0, 0x100808 -s_atomic_add s20, s0, 0x10080c -s_atomic_add s20, s0, 0x100810 -s_atomic_add s20, s0, 0x100814 -s_atomic_add s20, s0, 0x100818 -s_atomic_add s20, s0, 0x10081c -s_atomic_add s20, s0, 0x100820 -s_atomic_add s20, s0, 0x100824 -s_atomic_add s20, s0, 0x100828 -s_atomic_add s20, s0, 0x10082c - -s_atomic_add s20, s0, 0x100900 -s_atomic_add s20, s0, 0x100904 -s_atomic_add s20, s0, 0x100908 -s_atomic_add s20, s0, 0x10090c -s_atomic_add s20, s0, 0x100910 -s_atomic_add s20, s0, 0x100914 -s_atomic_add s20, s0, 0x100918 -s_atomic_add s20, s0, 0x10091c -s_atomic_add s20, s0, 0x100920 -s_atomic_add s20, s0, 0x100924 -s_atomic_add s20, s0, 0x100928 -s_atomic_add s20, s0, 0x10092c - -s_atomic_add s20, s0, 0x100a00 -s_atomic_add s20, s0, 0x100a04 -s_atomic_add s20, s0, 0x100a08 -s_atomic_add s20, s0, 0x100a0c -s_atomic_add s20, s0, 0x100a10 -s_atomic_add s20, s0, 0x100a14 -s_atomic_add s20, s0, 0x100a18 -s_atomic_add s20, s0, 0x100a1c -s_atomic_add s20, s0, 0x100a20 -s_atomic_add s20, s0, 0x100a24 -s_atomic_add s20, s0, 0x100a28 -s_atomic_add s20, s0, 0x100a2c - -s_atomic_add s20, s0, 0x100b00 -s_atomic_add s20, s0, 0x100b04 -s_atomic_add s20, s0, 0x100b08 -s_atomic_add s20, s0, 0x100b0c -s_atomic_add s20, s0, 0x100b10 -s_atomic_add s20, s0, 0x100b14 -s_atomic_add s20, s0, 0x100b18 -s_atomic_add s20, s0, 0x100b1c -s_atomic_add s20, s0, 0x100b20 -s_atomic_add s20, s0, 0x100b24 -s_atomic_add s20, s0, 0x100b28 -s_atomic_add s20, s0, 0x100b2c - -s_atomic_add s20, s0, 0x100c00 -s_atomic_add s20, s0, 0x100c04 -s_atomic_add s20, s0, 0x100c08 -s_atomic_add s20, s0, 0x100c0c -s_atomic_add s20, s0, 0x100c10 -s_atomic_add s20, s0, 0x100c14 -s_atomic_add s20, s0, 0x100c18 -s_atomic_add s20, s0, 0x100c1c -s_atomic_add s20, s0, 0x100c20 -s_atomic_add s20, s0, 0x100c24 -s_atomic_add s20, s0, 0x100c28 -s_atomic_add s20, s0, 0x100c2c - -s_atomic_add s20, s0, 0x101000 -s_atomic_add s20, s0, 0x101010 -s_atomic_add s20, s0, 0x101020 -s_atomic_add s20, s0, 0x101030 -s_atomic_add s20, s0, 0x101040 -s_atomic_add s20, s0, 0x101050 -s_atomic_add s20, s0, 0x101060 -s_atomic_add s20, s0, 0x101070 -s_atomic_add s20, s0, 0x101080 -s_atomic_add s20, s0, 0x101090 - -s_atomic_add s20, s0, 0x101100 -s_atomic_add s20, s0, 0x101110 -s_atomic_add s20, s0, 0x101120 -s_atomic_add s20, s0, 0x101130 -s_atomic_add s20, s0, 0x101140 -s_atomic_add s20, s0, 0x101150 -s_atomic_add s20, s0, 0x101160 -s_atomic_add s20, s0, 0x101170 -s_atomic_add s20, s0, 0x101180 -s_atomic_add s20, s0, 0x101190 - -s_atomic_add s20, s0, 0x101200 -s_atomic_add s20, s0, 0x101210 -s_atomic_add s20, s0, 0x101220 -s_atomic_add s20, s0, 0x101230 -s_atomic_add s20, s0, 0x101240 -s_atomic_add s20, s0, 0x101250 -s_atomic_add s20, s0, 0x101260 -s_atomic_add s20, s0, 0x101270 -s_atomic_add s20, s0, 0x101280 -s_atomic_add s20, s0, 0x101290 - -s_atomic_add s20, s0, 0x101300 -s_atomic_add s20, s0, 0x101310 -s_atomic_add s20, s0, 0x101320 -s_atomic_add s20, s0, 0x101330 -s_atomic_add s20, s0, 0x101340 -s_atomic_add s20, s0, 0x101350 -s_atomic_add s20, s0, 0x101360 -s_atomic_add s20, s0, 0x101370 -s_atomic_add s20, s0, 0x101380 -s_atomic_add s20, s0, 0x101390 - -s_atomic_add s20, s0, 0x101400 -s_atomic_add s20, s0, 0x101404 -s_atomic_add s20, s0, 0x101408 -s_atomic_add s20, s0, 0x10140c -s_atomic_add s20, s0, 0x101410 -s_atomic_add s20, s0, 0x101414 -s_atomic_add s20, s0, 0x101418 -s_atomic_add s20, s0, 0x10141c -s_atomic_add s20, s0, 0x101420 -s_atomic_add s20, s0, 0x101424 -s_atomic_add s20, s0, 0x101428 -s_atomic_add s20, s0, 0x10142c - -s_atomic_add s20, s0, 0x101500 -s_atomic_add s20, s0, 0x101504 -s_atomic_add s20, s0, 0x101508 -s_atomic_add s20, s0, 0x10150c -s_atomic_add s20, s0, 0x101510 -s_atomic_add s20, s0, 0x101514 -s_atomic_add s20, s0, 0x101518 -s_atomic_add s20, s0, 0x10151c -s_atomic_add s20, s0, 0x101520 -s_atomic_add s20, s0, 0x101524 -s_atomic_add s20, s0, 0x101528 -s_atomic_add s20, s0, 0x10152c - -s_atomic_add s20, s0, 0x101600 -s_atomic_add s20, s0, 0x101604 -s_atomic_add s20, s0, 0x101608 -s_atomic_add s20, s0, 0x10160c -s_atomic_add s20, s0, 0x101610 -s_atomic_add s20, s0, 0x101614 -s_atomic_add s20, s0, 0x101618 -s_atomic_add s20, s0, 0x10161c -s_atomic_add s20, s0, 0x101620 -s_atomic_add s20, s0, 0x101624 -s_atomic_add s20, s0, 0x101628 -s_atomic_add s20, s0, 0x10162c - -s_atomic_add s20, s0, 0x101700 -s_atomic_add s20, s0, 0x101704 -s_atomic_add s20, s0, 0x101708 -s_atomic_add s20, s0, 0x10170c -s_atomic_add s20, s0, 0x101710 -s_atomic_add s20, s0, 0x101714 -s_atomic_add s20, s0, 0x101718 -s_atomic_add s20, s0, 0x10171c -s_atomic_add s20, s0, 0x101720 -s_atomic_add s20, s0, 0x101724 -s_atomic_add s20, s0, 0x101728 -s_atomic_add s20, s0, 0x10172c - -s_atomic_add s20, s0, 0x101800 -s_atomic_add s20, s0, 0x101804 -s_atomic_add s20, s0, 0x101808 -s_atomic_add s20, s0, 0x10180c -s_atomic_add s20, s0, 0x101810 -s_atomic_add s20, s0, 0x101814 -s_atomic_add s20, s0, 0x101818 -s_atomic_add s20, s0, 0x10181c -s_atomic_add s20, s0, 0x101820 -s_atomic_add s20, s0, 0x101824 -s_atomic_add s20, s0, 0x101828 -s_atomic_add s20, s0, 0x10182c - -s_atomic_add s20, s0, 0x101900 -s_atomic_add s20, s0, 0x101904 -s_atomic_add s20, s0, 0x101908 -s_atomic_add s20, s0, 0x10190c -s_atomic_add s20, s0, 0x101910 -s_atomic_add s20, s0, 0x101914 -s_atomic_add s20, s0, 0x101918 -s_atomic_add s20, s0, 0x10191c -s_atomic_add s20, s0, 0x101920 -s_atomic_add s20, s0, 0x101924 -s_atomic_add s20, s0, 0x101928 -s_atomic_add s20, s0, 0x10192c - -s_atomic_add s20, s0, 0x101a00 -s_atomic_add s20, s0, 0x101a04 -s_atomic_add s20, s0, 0x101a08 -s_atomic_add s20, s0, 0x101a0c -s_atomic_add s20, s0, 0x101a10 -s_atomic_add s20, s0, 0x101a14 -s_atomic_add s20, s0, 0x101a18 -s_atomic_add s20, s0, 0x101a1c -s_atomic_add s20, s0, 0x101a20 -s_atomic_add s20, s0, 0x101a24 -s_atomic_add s20, s0, 0x101a28 -s_atomic_add s20, s0, 0x101a2c - -s_atomic_add s20, s0, 0x101b00 -s_atomic_add s20, s0, 0x101b04 -s_atomic_add s20, s0, 0x101b08 -s_atomic_add s20, s0, 0x101b0c -s_atomic_add s20, s0, 0x101b10 -s_atomic_add s20, s0, 0x101b14 -s_atomic_add s20, s0, 0x101b18 -s_atomic_add s20, s0, 0x101b1c -s_atomic_add s20, s0, 0x101b20 -s_atomic_add s20, s0, 0x101b24 -s_atomic_add s20, s0, 0x101b28 -s_atomic_add s20, s0, 0x101b2c - -s_atomic_add s20, s0, 0x101c00 -s_atomic_add s20, s0, 0x101c04 -s_atomic_add s20, s0, 0x101c08 -s_atomic_add s20, s0, 0x101c0c -s_atomic_add s20, s0, 0x101c10 -s_atomic_add s20, s0, 0x101c14 -s_atomic_add s20, s0, 0x101c18 -s_atomic_add s20, s0, 0x101c1c -s_atomic_add s20, s0, 0x101c20 -s_atomic_add s20, s0, 0x101c24 -s_atomic_add s20, s0, 0x101c28 -s_atomic_add s20, s0, 0x101c2c - -s_atomic_add s20, s0, 0x100000 -s_atomic_add s20, s0, 0x100010 -s_atomic_add s20, s0, 0x100020 -s_atomic_add s20, s0, 0x100030 -s_atomic_add s20, s0, 0x100040 -s_atomic_add s20, s0, 0x100050 -s_atomic_add s20, s0, 0x100060 -s_atomic_add s20, s0, 0x100070 -s_atomic_add s20, s0, 0x100080 -s_atomic_add s20, s0, 0x100090 - -s_atomic_add s20, s0, 0x100100 -s_atomic_add s20, s0, 0x100110 -s_atomic_add s20, s0, 0x100120 -s_atomic_add s20, s0, 0x100130 -s_atomic_add s20, s0, 0x100140 -s_atomic_add s20, s0, 0x100150 -s_atomic_add s20, s0, 0x100160 -s_atomic_add s20, s0, 0x100170 -s_atomic_add s20, s0, 0x100180 -s_atomic_add s20, s0, 0x100190 - -s_atomic_add s20, s0, 0x100200 -s_atomic_add s20, s0, 0x100210 -s_atomic_add s20, s0, 0x100220 -s_atomic_add s20, s0, 0x100230 -s_atomic_add s20, s0, 0x100240 -s_atomic_add s20, s0, 0x100250 -s_atomic_add s20, s0, 0x100260 -s_atomic_add s20, s0, 0x100270 -s_atomic_add s20, s0, 0x100280 -s_atomic_add s20, s0, 0x100290 - -s_atomic_add s20, s0, 0x100300 -s_atomic_add s20, s0, 0x100310 -s_atomic_add s20, s0, 0x100320 -s_atomic_add s20, s0, 0x100330 -s_atomic_add s20, s0, 0x100340 -s_atomic_add s20, s0, 0x100350 -s_atomic_add s20, s0, 0x100360 -s_atomic_add s20, s0, 0x100370 -s_atomic_add s20, s0, 0x100380 -s_atomic_add s20, s0, 0x100390 - -s_atomic_add s20, s0, 0x100400 -s_atomic_add s20, s0, 0x100404 -s_atomic_add s20, s0, 0x100408 -s_atomic_add s20, s0, 0x10040c -s_atomic_add s20, s0, 0x100410 -s_atomic_add s20, s0, 0x100414 -s_atomic_add s20, s0, 0x100418 -s_atomic_add s20, s0, 0x10041c -s_atomic_add s20, s0, 0x100420 -s_atomic_add s20, s0, 0x100424 -s_atomic_add s20, s0, 0x100428 -s_atomic_add s20, s0, 0x10042c - -s_atomic_add s20, s0, 0x100500 -s_atomic_add s20, s0, 0x100504 -s_atomic_add s20, s0, 0x100508 -s_atomic_add s20, s0, 0x10050c -s_atomic_add s20, s0, 0x100510 -s_atomic_add s20, s0, 0x100514 -s_atomic_add s20, s0, 0x100518 -s_atomic_add s20, s0, 0x10051c -s_atomic_add s20, s0, 0x100520 -s_atomic_add s20, s0, 0x100524 -s_atomic_add s20, s0, 0x100528 -s_atomic_add s20, s0, 0x10052c - -s_atomic_add s20, s0, 0x100600 -s_atomic_add s20, s0, 0x100604 -s_atomic_add s20, s0, 0x100608 -s_atomic_add s20, s0, 0x10060c -s_atomic_add s20, s0, 0x100610 -s_atomic_add s20, s0, 0x100614 -s_atomic_add s20, s0, 0x100618 -s_atomic_add s20, s0, 0x10061c -s_atomic_add s20, s0, 0x100620 -s_atomic_add s20, s0, 0x100624 -s_atomic_add s20, s0, 0x100628 -s_atomic_add s20, s0, 0x10062c - -s_atomic_add s20, s0, 0x100700 -s_atomic_add s20, s0, 0x100704 -s_atomic_add s20, s0, 0x100708 -s_atomic_add s20, s0, 0x10070c -s_atomic_add s20, s0, 0x100710 -s_atomic_add s20, s0, 0x100714 -s_atomic_add s20, s0, 0x100718 -s_atomic_add s20, s0, 0x10071c -s_atomic_add s20, s0, 0x100720 -s_atomic_add s20, s0, 0x100724 -s_atomic_add s20, s0, 0x100728 -s_atomic_add s20, s0, 0x10072c - -s_atomic_add s20, s0, 0x100800 -s_atomic_add s20, s0, 0x100804 -s_atomic_add s20, s0, 0x100808 -s_atomic_add s20, s0, 0x10080c -s_atomic_add s20, s0, 0x100810 -s_atomic_add s20, s0, 0x100814 -s_atomic_add s20, s0, 0x100818 -s_atomic_add s20, s0, 0x10081c -s_atomic_add s20, s0, 0x100820 -s_atomic_add s20, s0, 0x100824 -s_atomic_add s20, s0, 0x100828 -s_atomic_add s20, s0, 0x10082c - -s_atomic_add s20, s0, 0x100900 -s_atomic_add s20, s0, 0x100904 -s_atomic_add s20, s0, 0x100908 -s_atomic_add s20, s0, 0x10090c -s_atomic_add s20, s0, 0x100910 -s_atomic_add s20, s0, 0x100914 -s_atomic_add s20, s0, 0x100918 -s_atomic_add s20, s0, 0x10091c -s_atomic_add s20, s0, 0x100920 -s_atomic_add s20, s0, 0x100924 -s_atomic_add s20, s0, 0x100928 -s_atomic_add s20, s0, 0x10092c - -s_atomic_add s20, s0, 0x100a00 -s_atomic_add s20, s0, 0x100a04 -s_atomic_add s20, s0, 0x100a08 -s_atomic_add s20, s0, 0x100a0c -s_atomic_add s20, s0, 0x100a10 -s_atomic_add s20, s0, 0x100a14 -s_atomic_add s20, s0, 0x100a18 -s_atomic_add s20, s0, 0x100a1c -s_atomic_add s20, s0, 0x100a20 -s_atomic_add s20, s0, 0x100a24 -s_atomic_add s20, s0, 0x100a28 -s_atomic_add s20, s0, 0x100a2c - -s_atomic_add s20, s0, 0x100b00 -s_atomic_add s20, s0, 0x100b04 -s_atomic_add s20, s0, 0x100b08 -s_atomic_add s20, s0, 0x100b0c -s_atomic_add s20, s0, 0x100b10 -s_atomic_add s20, s0, 0x100b14 -s_atomic_add s20, s0, 0x100b18 -s_atomic_add s20, s0, 0x100b1c -s_atomic_add s20, s0, 0x100b20 -s_atomic_add s20, s0, 0x100b24 -s_atomic_add s20, s0, 0x100b28 -s_atomic_add s20, s0, 0x100b2c - -s_atomic_add s20, s0, 0x100c00 -s_atomic_add s20, s0, 0x100c04 -s_atomic_add s20, s0, 0x100c08 -s_atomic_add s20, s0, 0x100c0c -s_atomic_add s20, s0, 0x100c10 -s_atomic_add s20, s0, 0x100c14 -s_atomic_add s20, s0, 0x100c18 -s_atomic_add s20, s0, 0x100c1c -s_atomic_add s20, s0, 0x100c20 -s_atomic_add s20, s0, 0x100c24 -s_atomic_add s20, s0, 0x100c28 -s_atomic_add s20, s0, 0x100c2c - -s_atomic_add s20, s0, 0x101000 -s_atomic_add s20, s0, 0x101010 -s_atomic_add s20, s0, 0x101020 -s_atomic_add s20, s0, 0x101030 -s_atomic_add s20, s0, 0x101040 -s_atomic_add s20, s0, 0x101050 -s_atomic_add s20, s0, 0x101060 -s_atomic_add s20, s0, 0x101070 -s_atomic_add s20, s0, 0x101080 -s_atomic_add s20, s0, 0x101090 - -s_atomic_add s20, s0, 0x101100 -s_atomic_add s20, s0, 0x101110 -s_atomic_add s20, s0, 0x101120 -s_atomic_add s20, s0, 0x101130 -s_atomic_add s20, s0, 0x101140 -s_atomic_add s20, s0, 0x101150 -s_atomic_add s20, s0, 0x101160 -s_atomic_add s20, s0, 0x101170 -s_atomic_add s20, s0, 0x101180 -s_atomic_add s20, s0, 0x101190 - -s_atomic_add s20, s0, 0x101200 -s_atomic_add s20, s0, 0x101210 -s_atomic_add s20, s0, 0x101220 -s_atomic_add s20, s0, 0x101230 -s_atomic_add s20, s0, 0x101240 -s_atomic_add s20, s0, 0x101250 -s_atomic_add s20, s0, 0x101260 -s_atomic_add s20, s0, 0x101270 -s_atomic_add s20, s0, 0x101280 -s_atomic_add s20, s0, 0x101290 - -s_atomic_add s20, s0, 0x101300 -s_atomic_add s20, s0, 0x101310 -s_atomic_add s20, s0, 0x101320 -s_atomic_add s20, s0, 0x101330 -s_atomic_add s20, s0, 0x101340 -s_atomic_add s20, s0, 0x101350 -s_atomic_add s20, s0, 0x101360 -s_atomic_add s20, s0, 0x101370 -s_atomic_add s20, s0, 0x101380 -s_atomic_add s20, s0, 0x101390 - -s_atomic_add s20, s0, 0x101400 -s_atomic_add s20, s0, 0x101404 -s_atomic_add s20, s0, 0x101408 -s_atomic_add s20, s0, 0x10140c -s_atomic_add s20, s0, 0x101410 -s_atomic_add s20, s0, 0x101414 -s_atomic_add s20, s0, 0x101418 -s_atomic_add s20, s0, 0x10141c -s_atomic_add s20, s0, 0x101420 -s_atomic_add s20, s0, 0x101424 -s_atomic_add s20, s0, 0x101428 -s_atomic_add s20, s0, 0x10142c - -s_atomic_add s20, s0, 0x101500 -s_atomic_add s20, s0, 0x101504 -s_atomic_add s20, s0, 0x101508 -s_atomic_add s20, s0, 0x10150c -s_atomic_add s20, s0, 0x101510 -s_atomic_add s20, s0, 0x101514 -s_atomic_add s20, s0, 0x101518 -s_atomic_add s20, s0, 0x10151c -s_atomic_add s20, s0, 0x101520 -s_atomic_add s20, s0, 0x101524 -s_atomic_add s20, s0, 0x101528 -s_atomic_add s20, s0, 0x10152c - -s_atomic_add s20, s0, 0x101600 -s_atomic_add s20, s0, 0x101604 -s_atomic_add s20, s0, 0x101608 -s_atomic_add s20, s0, 0x10160c -s_atomic_add s20, s0, 0x101610 -s_atomic_add s20, s0, 0x101614 -s_atomic_add s20, s0, 0x101618 -s_atomic_add s20, s0, 0x10161c -s_atomic_add s20, s0, 0x101620 -s_atomic_add s20, s0, 0x101624 -s_atomic_add s20, s0, 0x101628 -s_atomic_add s20, s0, 0x10162c - -s_atomic_add s20, s0, 0x101700 -s_atomic_add s20, s0, 0x101704 -s_atomic_add s20, s0, 0x101708 -s_atomic_add s20, s0, 0x10170c -s_atomic_add s20, s0, 0x101710 -s_atomic_add s20, s0, 0x101714 -s_atomic_add s20, s0, 0x101718 -s_atomic_add s20, s0, 0x10171c -s_atomic_add s20, s0, 0x101720 -s_atomic_add s20, s0, 0x101724 -s_atomic_add s20, s0, 0x101728 -s_atomic_add s20, s0, 0x10172c - -s_atomic_add s20, s0, 0x101800 -s_atomic_add s20, s0, 0x101804 -s_atomic_add s20, s0, 0x101808 -s_atomic_add s20, s0, 0x10180c -s_atomic_add s20, s0, 0x101810 -s_atomic_add s20, s0, 0x101814 -s_atomic_add s20, s0, 0x101818 -s_atomic_add s20, s0, 0x10181c -s_atomic_add s20, s0, 0x101820 -s_atomic_add s20, s0, 0x101824 -s_atomic_add s20, s0, 0x101828 -s_atomic_add s20, s0, 0x10182c - -s_atomic_add s20, s0, 0x101900 -s_atomic_add s20, s0, 0x101904 -s_atomic_add s20, s0, 0x101908 -s_atomic_add s20, s0, 0x10190c -s_atomic_add s20, s0, 0x101910 -s_atomic_add s20, s0, 0x101914 -s_atomic_add s20, s0, 0x101918 -s_atomic_add s20, s0, 0x10191c -s_atomic_add s20, s0, 0x101920 -s_atomic_add s20, s0, 0x101924 -s_atomic_add s20, s0, 0x101928 -s_atomic_add s20, s0, 0x10192c - -s_atomic_add s20, s0, 0x101a00 -s_atomic_add s20, s0, 0x101a04 -s_atomic_add s20, s0, 0x101a08 -s_atomic_add s20, s0, 0x101a0c -s_atomic_add s20, s0, 0x101a10 -s_atomic_add s20, s0, 0x101a14 -s_atomic_add s20, s0, 0x101a18 -s_atomic_add s20, s0, 0x101a1c -s_atomic_add s20, s0, 0x101a20 -s_atomic_add s20, s0, 0x101a24 -s_atomic_add s20, s0, 0x101a28 -s_atomic_add s20, s0, 0x101a2c - -s_atomic_add s20, s0, 0x101b00 -s_atomic_add s20, s0, 0x101b04 -s_atomic_add s20, s0, 0x101b08 -s_atomic_add s20, s0, 0x101b0c -s_atomic_add s20, s0, 0x101b10 -s_atomic_add s20, s0, 0x101b14 -s_atomic_add s20, s0, 0x101b18 -s_atomic_add s20, s0, 0x101b1c -s_atomic_add s20, s0, 0x101b20 -s_atomic_add s20, s0, 0x101b24 -s_atomic_add s20, s0, 0x101b28 -s_atomic_add s20, s0, 0x101b2c - -s_atomic_add s20, s0, 0x101c00 -s_atomic_add s20, s0, 0x101c04 -s_atomic_add s20, s0, 0x101c08 -s_atomic_add s20, s0, 0x101c0c -s_atomic_add s20, s0, 0x101c10 -s_atomic_add s20, s0, 0x101c14 -s_atomic_add s20, s0, 0x101c18 -s_atomic_add s20, s0, 0x101c1c -s_atomic_add s20, s0, 0x101c20 -s_atomic_add s20, s0, 0x101c24 -s_atomic_add s20, s0, 0x101c28 -s_atomic_add s20, s0, 0x101c2c - -s_atomic_add s20, s0, 0x100000 -s_atomic_add s20, s0, 0x100010 -s_atomic_add s20, s0, 0x100020 -s_atomic_add s20, s0, 0x100030 -s_atomic_add s20, s0, 0x100040 -s_atomic_add s20, s0, 0x100050 -s_atomic_add s20, s0, 0x100060 -s_atomic_add s20, s0, 0x100070 -s_atomic_add s20, s0, 0x100080 -s_atomic_add s20, s0, 0x100090 - -s_atomic_add s20, s0, 0x100100 -s_atomic_add s20, s0, 0x100110 -s_atomic_add s20, s0, 0x100120 -s_atomic_add s20, s0, 0x100130 -s_atomic_add s20, s0, 0x100140 -s_atomic_add s20, s0, 0x100150 -s_atomic_add s20, s0, 0x100160 -s_atomic_add s20, s0, 0x100170 -s_atomic_add s20, s0, 0x100180 -s_atomic_add s20, s0, 0x100190 - -s_atomic_add s20, s0, 0x100200 -s_atomic_add s20, s0, 0x100210 -s_atomic_add s20, s0, 0x100220 -s_atomic_add s20, s0, 0x100230 -s_atomic_add s20, s0, 0x100240 -s_atomic_add s20, s0, 0x100250 -s_atomic_add s20, s0, 0x100260 -s_atomic_add s20, s0, 0x100270 -s_atomic_add s20, s0, 0x100280 -s_atomic_add s20, s0, 0x100290 - -s_atomic_add s20, s0, 0x100300 -s_atomic_add s20, s0, 0x100310 -s_atomic_add s20, s0, 0x100320 -s_atomic_add s20, s0, 0x100330 -s_atomic_add s20, s0, 0x100340 -s_atomic_add s20, s0, 0x100350 -s_atomic_add s20, s0, 0x100360 -s_atomic_add s20, s0, 0x100370 -s_atomic_add s20, s0, 0x100380 -s_atomic_add s20, s0, 0x100390 - -s_atomic_add s20, s0, 0x100400 -s_atomic_add s20, s0, 0x100404 -s_atomic_add s20, s0, 0x100408 -s_atomic_add s20, s0, 0x10040c -s_atomic_add s20, s0, 0x100410 -s_atomic_add s20, s0, 0x100414 -s_atomic_add s20, s0, 0x100418 -s_atomic_add s20, s0, 0x10041c -s_atomic_add s20, s0, 0x100420 -s_atomic_add s20, s0, 0x100424 -s_atomic_add s20, s0, 0x100428 -s_atomic_add s20, s0, 0x10042c - -s_atomic_add s20, s0, 0x100500 -s_atomic_add s20, s0, 0x100504 -s_atomic_add s20, s0, 0x100508 -s_atomic_add s20, s0, 0x10050c -s_atomic_add s20, s0, 0x100510 -s_atomic_add s20, s0, 0x100514 -s_atomic_add s20, s0, 0x100518 -s_atomic_add s20, s0, 0x10051c -s_atomic_add s20, s0, 0x100520 -s_atomic_add s20, s0, 0x100524 -s_atomic_add s20, s0, 0x100528 -s_atomic_add s20, s0, 0x10052c - -s_atomic_add s20, s0, 0x100600 -s_atomic_add s20, s0, 0x100604 -s_atomic_add s20, s0, 0x100608 -s_atomic_add s20, s0, 0x10060c -s_atomic_add s20, s0, 0x100610 -s_atomic_add s20, s0, 0x100614 -s_atomic_add s20, s0, 0x100618 -s_atomic_add s20, s0, 0x10061c -s_atomic_add s20, s0, 0x100620 -s_atomic_add s20, s0, 0x100624 -s_atomic_add s20, s0, 0x100628 -s_atomic_add s20, s0, 0x10062c - -s_atomic_add s20, s0, 0x100700 -s_atomic_add s20, s0, 0x100704 -s_atomic_add s20, s0, 0x100708 -s_atomic_add s20, s0, 0x10070c -s_atomic_add s20, s0, 0x100710 -s_atomic_add s20, s0, 0x100714 -s_atomic_add s20, s0, 0x100718 -s_atomic_add s20, s0, 0x10071c -s_atomic_add s20, s0, 0x100720 -s_atomic_add s20, s0, 0x100724 -s_atomic_add s20, s0, 0x100728 -s_atomic_add s20, s0, 0x10072c - -s_atomic_add s20, s0, 0x100800 -s_atomic_add s20, s0, 0x100804 -s_atomic_add s20, s0, 0x100808 -s_atomic_add s20, s0, 0x10080c -s_atomic_add s20, s0, 0x100810 -s_atomic_add s20, s0, 0x100814 -s_atomic_add s20, s0, 0x100818 -s_atomic_add s20, s0, 0x10081c -s_atomic_add s20, s0, 0x100820 -s_atomic_add s20, s0, 0x100824 -s_atomic_add s20, s0, 0x100828 -s_atomic_add s20, s0, 0x10082c - -s_atomic_add s20, s0, 0x100900 -s_atomic_add s20, s0, 0x100904 -s_atomic_add s20, s0, 0x100908 -s_atomic_add s20, s0, 0x10090c -s_atomic_add s20, s0, 0x100910 -s_atomic_add s20, s0, 0x100914 -s_atomic_add s20, s0, 0x100918 -s_atomic_add s20, s0, 0x10091c -s_atomic_add s20, s0, 0x100920 -s_atomic_add s20, s0, 0x100924 -s_atomic_add s20, s0, 0x100928 -s_atomic_add s20, s0, 0x10092c - -s_atomic_add s20, s0, 0x100a00 -s_atomic_add s20, s0, 0x100a04 -s_atomic_add s20, s0, 0x100a08 -s_atomic_add s20, s0, 0x100a0c -s_atomic_add s20, s0, 0x100a10 -s_atomic_add s20, s0, 0x100a14 -s_atomic_add s20, s0, 0x100a18 -s_atomic_add s20, s0, 0x100a1c -s_atomic_add s20, s0, 0x100a20 -s_atomic_add s20, s0, 0x100a24 -s_atomic_add s20, s0, 0x100a28 -s_atomic_add s20, s0, 0x100a2c - -s_atomic_add s20, s0, 0x100b00 -s_atomic_add s20, s0, 0x100b04 -s_atomic_add s20, s0, 0x100b08 -s_atomic_add s20, s0, 0x100b0c -s_atomic_add s20, s0, 0x100b10 -s_atomic_add s20, s0, 0x100b14 -s_atomic_add s20, s0, 0x100b18 -s_atomic_add s20, s0, 0x100b1c -s_atomic_add s20, s0, 0x100b20 -s_atomic_add s20, s0, 0x100b24 -s_atomic_add s20, s0, 0x100b28 -s_atomic_add s20, s0, 0x100b2c - -s_atomic_add s20, s0, 0x100c00 -s_atomic_add s20, s0, 0x100c04 -s_atomic_add s20, s0, 0x100c08 -s_atomic_add s20, s0, 0x100c0c -s_atomic_add s20, s0, 0x100c10 -s_atomic_add s20, s0, 0x100c14 -s_atomic_add s20, s0, 0x100c18 -s_atomic_add s20, s0, 0x100c1c -s_atomic_add s20, s0, 0x100c20 -s_atomic_add s20, s0, 0x100c24 -s_atomic_add s20, s0, 0x100c28 -s_atomic_add s20, s0, 0x100c2c - -s_atomic_add s20, s0, 0x101000 -s_atomic_add s20, s0, 0x101010 -s_atomic_add s20, s0, 0x101020 -s_atomic_add s20, s0, 0x101030 -s_atomic_add s20, s0, 0x101040 -s_atomic_add s20, s0, 0x101050 -s_atomic_add s20, s0, 0x101060 -s_atomic_add s20, s0, 0x101070 -s_atomic_add s20, s0, 0x101080 -s_atomic_add s20, s0, 0x101090 - -s_atomic_add s20, s0, 0x101100 -s_atomic_add s20, s0, 0x101110 -s_atomic_add s20, s0, 0x101120 -s_atomic_add s20, s0, 0x101130 -s_atomic_add s20, s0, 0x101140 -s_atomic_add s20, s0, 0x101150 -s_atomic_add s20, s0, 0x101160 -s_atomic_add s20, s0, 0x101170 -s_atomic_add s20, s0, 0x101180 -s_atomic_add s20, s0, 0x101190 - -s_atomic_add s20, s0, 0x101200 -s_atomic_add s20, s0, 0x101210 -s_atomic_add s20, s0, 0x101220 -s_atomic_add s20, s0, 0x101230 -s_atomic_add s20, s0, 0x101240 -s_atomic_add s20, s0, 0x101250 -s_atomic_add s20, s0, 0x101260 -s_atomic_add s20, s0, 0x101270 -s_atomic_add s20, s0, 0x101280 -s_atomic_add s20, s0, 0x101290 - -s_atomic_add s20, s0, 0x101300 -s_atomic_add s20, s0, 0x101310 -s_atomic_add s20, s0, 0x101320 -s_atomic_add s20, s0, 0x101330 -s_atomic_add s20, s0, 0x101340 -s_atomic_add s20, s0, 0x101350 -s_atomic_add s20, s0, 0x101360 -s_atomic_add s20, s0, 0x101370 -s_atomic_add s20, s0, 0x101380 -s_atomic_add s20, s0, 0x101390 - -s_atomic_add s20, s0, 0x101400 -s_atomic_add s20, s0, 0x101404 -s_atomic_add s20, s0, 0x101408 -s_atomic_add s20, s0, 0x10140c -s_atomic_add s20, s0, 0x101410 -s_atomic_add s20, s0, 0x101414 -s_atomic_add s20, s0, 0x101418 -s_atomic_add s20, s0, 0x10141c -s_atomic_add s20, s0, 0x101420 -s_atomic_add s20, s0, 0x101424 -s_atomic_add s20, s0, 0x101428 -s_atomic_add s20, s0, 0x10142c - -s_atomic_add s20, s0, 0x101500 -s_atomic_add s20, s0, 0x101504 -s_atomic_add s20, s0, 0x101508 -s_atomic_add s20, s0, 0x10150c -s_atomic_add s20, s0, 0x101510 -s_atomic_add s20, s0, 0x101514 -s_atomic_add s20, s0, 0x101518 -s_atomic_add s20, s0, 0x10151c -s_atomic_add s20, s0, 0x101520 -s_atomic_add s20, s0, 0x101524 -s_atomic_add s20, s0, 0x101528 -s_atomic_add s20, s0, 0x10152c - -s_atomic_add s20, s0, 0x101600 -s_atomic_add s20, s0, 0x101604 -s_atomic_add s20, s0, 0x101608 -s_atomic_add s20, s0, 0x10160c -s_atomic_add s20, s0, 0x101610 -s_atomic_add s20, s0, 0x101614 -s_atomic_add s20, s0, 0x101618 -s_atomic_add s20, s0, 0x10161c -s_atomic_add s20, s0, 0x101620 -s_atomic_add s20, s0, 0x101624 -s_atomic_add s20, s0, 0x101628 -s_atomic_add s20, s0, 0x10162c - -s_atomic_add s20, s0, 0x101700 -s_atomic_add s20, s0, 0x101704 -s_atomic_add s20, s0, 0x101708 -s_atomic_add s20, s0, 0x10170c -s_atomic_add s20, s0, 0x101710 -s_atomic_add s20, s0, 0x101714 -s_atomic_add s20, s0, 0x101718 -s_atomic_add s20, s0, 0x10171c -s_atomic_add s20, s0, 0x101720 -s_atomic_add s20, s0, 0x101724 -s_atomic_add s20, s0, 0x101728 -s_atomic_add s20, s0, 0x10172c - -s_atomic_add s20, s0, 0x101800 -s_atomic_add s20, s0, 0x101804 -s_atomic_add s20, s0, 0x101808 -s_atomic_add s20, s0, 0x10180c -s_atomic_add s20, s0, 0x101810 -s_atomic_add s20, s0, 0x101814 -s_atomic_add s20, s0, 0x101818 -s_atomic_add s20, s0, 0x10181c -s_atomic_add s20, s0, 0x101820 -s_atomic_add s20, s0, 0x101824 -s_atomic_add s20, s0, 0x101828 -s_atomic_add s20, s0, 0x10182c - -s_atomic_add s20, s0, 0x101900 -s_atomic_add s20, s0, 0x101904 -s_atomic_add s20, s0, 0x101908 -s_atomic_add s20, s0, 0x10190c -s_atomic_add s20, s0, 0x101910 -s_atomic_add s20, s0, 0x101914 -s_atomic_add s20, s0, 0x101918 -s_atomic_add s20, s0, 0x10191c -s_atomic_add s20, s0, 0x101920 -s_atomic_add s20, s0, 0x101924 -s_atomic_add s20, s0, 0x101928 -s_atomic_add s20, s0, 0x10192c - -s_atomic_add s20, s0, 0x101a00 -s_atomic_add s20, s0, 0x101a04 -s_atomic_add s20, s0, 0x101a08 -s_atomic_add s20, s0, 0x101a0c -s_atomic_add s20, s0, 0x101a10 -s_atomic_add s20, s0, 0x101a14 -s_atomic_add s20, s0, 0x101a18 -s_atomic_add s20, s0, 0x101a1c -s_atomic_add s20, s0, 0x101a20 -s_atomic_add s20, s0, 0x101a24 -s_atomic_add s20, s0, 0x101a28 -s_atomic_add s20, s0, 0x101a2c - -s_atomic_add s20, s0, 0x101b00 -s_atomic_add s20, s0, 0x101b04 -s_atomic_add s20, s0, 0x101b08 -s_atomic_add s20, s0, 0x101b0c -s_atomic_add s20, s0, 0x101b10 -s_atomic_add s20, s0, 0x101b14 -s_atomic_add s20, s0, 0x101b18 -s_atomic_add s20, s0, 0x101b1c -s_atomic_add s20, s0, 0x101b20 -s_atomic_add s20, s0, 0x101b24 -s_atomic_add s20, s0, 0x101b28 -s_atomic_add s20, s0, 0x101b2c - -s_atomic_add s20, s0, 0x101c00 -s_atomic_add s20, s0, 0x101c04 -s_atomic_add s20, s0, 0x101c08 -s_atomic_add s20, s0, 0x101c0c -s_atomic_add s20, s0, 0x101c10 -s_atomic_add s20, s0, 0x101c14 -s_atomic_add s20, s0, 0x101c18 -s_atomic_add s20, s0, 0x101c1c -s_atomic_add s20, s0, 0x101c20 -s_atomic_add s20, s0, 0x101c24 -s_atomic_add s20, s0, 0x101c28 -s_atomic_add s20, s0, 0x101c2c - -s_atomic_add s20, s0, 0x100000 -s_atomic_add s20, s0, 0x100010 -s_atomic_add s20, s0, 0x100020 -s_atomic_add s20, s0, 0x100030 -s_atomic_add s20, s0, 0x100040 -s_atomic_add s20, s0, 0x100050 -s_atomic_add s20, s0, 0x100060 -s_atomic_add s20, s0, 0x100070 -s_atomic_add s20, s0, 0x100080 -s_atomic_add s20, s0, 0x100090 - -s_atomic_add s20, s0, 0x100100 -s_atomic_add s20, s0, 0x100110 -s_atomic_add s20, s0, 0x100120 -s_atomic_add s20, s0, 0x100130 -s_atomic_add s20, s0, 0x100140 -s_atomic_add s20, s0, 0x100150 -s_atomic_add s20, s0, 0x100160 -s_atomic_add s20, s0, 0x100170 -s_atomic_add s20, s0, 0x100180 -s_atomic_add s20, s0, 0x100190 - -s_atomic_add s20, s0, 0x100200 -s_atomic_add s20, s0, 0x100210 -s_atomic_add s20, s0, 0x100220 -s_atomic_add s20, s0, 0x100230 -s_atomic_add s20, s0, 0x100240 -s_atomic_add s20, s0, 0x100250 -s_atomic_add s20, s0, 0x100260 -s_atomic_add s20, s0, 0x100270 -s_atomic_add s20, s0, 0x100280 -s_atomic_add s20, s0, 0x100290 - -s_atomic_add s20, s0, 0x100300 -s_atomic_add s20, s0, 0x100310 -s_atomic_add s20, s0, 0x100320 -s_atomic_add s20, s0, 0x100330 -s_atomic_add s20, s0, 0x100340 -s_atomic_add s20, s0, 0x100350 -s_atomic_add s20, s0, 0x100360 -s_atomic_add s20, s0, 0x100370 -s_atomic_add s20, s0, 0x100380 -s_atomic_add s20, s0, 0x100390 - -s_atomic_add s20, s0, 0x100400 -s_atomic_add s20, s0, 0x100404 -s_atomic_add s20, s0, 0x100408 -s_atomic_add s20, s0, 0x10040c -s_atomic_add s20, s0, 0x100410 -s_atomic_add s20, s0, 0x100414 -s_atomic_add s20, s0, 0x100418 -s_atomic_add s20, s0, 0x10041c -s_atomic_add s20, s0, 0x100420 -s_atomic_add s20, s0, 0x100424 -s_atomic_add s20, s0, 0x100428 -s_atomic_add s20, s0, 0x10042c - -s_atomic_add s20, s0, 0x100500 -s_atomic_add s20, s0, 0x100504 -s_atomic_add s20, s0, 0x100508 -s_atomic_add s20, s0, 0x10050c -s_atomic_add s20, s0, 0x100510 -s_atomic_add s20, s0, 0x100514 -s_atomic_add s20, s0, 0x100518 -s_atomic_add s20, s0, 0x10051c -s_atomic_add s20, s0, 0x100520 -s_atomic_add s20, s0, 0x100524 -s_atomic_add s20, s0, 0x100528 -s_atomic_add s20, s0, 0x10052c - -s_atomic_add s20, s0, 0x100600 -s_atomic_add s20, s0, 0x100604 -s_atomic_add s20, s0, 0x100608 -s_atomic_add s20, s0, 0x10060c -s_atomic_add s20, s0, 0x100610 -s_atomic_add s20, s0, 0x100614 -s_atomic_add s20, s0, 0x100618 -s_atomic_add s20, s0, 0x10061c -s_atomic_add s20, s0, 0x100620 -s_atomic_add s20, s0, 0x100624 -s_atomic_add s20, s0, 0x100628 -s_atomic_add s20, s0, 0x10062c - -s_atomic_add s20, s0, 0x100700 -s_atomic_add s20, s0, 0x100704 -s_atomic_add s20, s0, 0x100708 -s_atomic_add s20, s0, 0x10070c -s_atomic_add s20, s0, 0x100710 -s_atomic_add s20, s0, 0x100714 -s_atomic_add s20, s0, 0x100718 -s_atomic_add s20, s0, 0x10071c -s_atomic_add s20, s0, 0x100720 -s_atomic_add s20, s0, 0x100724 -s_atomic_add s20, s0, 0x100728 -s_atomic_add s20, s0, 0x10072c - -s_atomic_add s20, s0, 0x100800 -s_atomic_add s20, s0, 0x100804 -s_atomic_add s20, s0, 0x100808 -s_atomic_add s20, s0, 0x10080c -s_atomic_add s20, s0, 0x100810 -s_atomic_add s20, s0, 0x100814 -s_atomic_add s20, s0, 0x100818 -s_atomic_add s20, s0, 0x10081c -s_atomic_add s20, s0, 0x100820 -s_atomic_add s20, s0, 0x100824 -s_atomic_add s20, s0, 0x100828 -s_atomic_add s20, s0, 0x10082c - -s_atomic_add s20, s0, 0x100900 -s_atomic_add s20, s0, 0x100904 -s_atomic_add s20, s0, 0x100908 -s_atomic_add s20, s0, 0x10090c -s_atomic_add s20, s0, 0x100910 -s_atomic_add s20, s0, 0x100914 -s_atomic_add s20, s0, 0x100918 -s_atomic_add s20, s0, 0x10091c -s_atomic_add s20, s0, 0x100920 -s_atomic_add s20, s0, 0x100924 -s_atomic_add s20, s0, 0x100928 -s_atomic_add s20, s0, 0x10092c - -s_atomic_add s20, s0, 0x100a00 -s_atomic_add s20, s0, 0x100a04 -s_atomic_add s20, s0, 0x100a08 -s_atomic_add s20, s0, 0x100a0c -s_atomic_add s20, s0, 0x100a10 -s_atomic_add s20, s0, 0x100a14 -s_atomic_add s20, s0, 0x100a18 -s_atomic_add s20, s0, 0x100a1c -s_atomic_add s20, s0, 0x100a20 -s_atomic_add s20, s0, 0x100a24 -s_atomic_add s20, s0, 0x100a28 -s_atomic_add s20, s0, 0x100a2c - -s_atomic_add s20, s0, 0x100b00 -s_atomic_add s20, s0, 0x100b04 -s_atomic_add s20, s0, 0x100b08 -s_atomic_add s20, s0, 0x100b0c -s_atomic_add s20, s0, 0x100b10 -s_atomic_add s20, s0, 0x100b14 -s_atomic_add s20, s0, 0x100b18 -s_atomic_add s20, s0, 0x100b1c -s_atomic_add s20, s0, 0x100b20 -s_atomic_add s20, s0, 0x100b24 -s_atomic_add s20, s0, 0x100b28 -s_atomic_add s20, s0, 0x100b2c - -s_atomic_add s20, s0, 0x100c00 -s_atomic_add s20, s0, 0x100c04 -s_atomic_add s20, s0, 0x100c08 -s_atomic_add s20, s0, 0x100c0c -s_atomic_add s20, s0, 0x100c10 -s_atomic_add s20, s0, 0x100c14 -s_atomic_add s20, s0, 0x100c18 -s_atomic_add s20, s0, 0x100c1c -s_atomic_add s20, s0, 0x100c20 -s_atomic_add s20, s0, 0x100c24 -s_atomic_add s20, s0, 0x100c28 -s_atomic_add s20, s0, 0x100c2c - -s_atomic_add s20, s0, 0x101000 -s_atomic_add s20, s0, 0x101010 -s_atomic_add s20, s0, 0x101020 -s_atomic_add s20, s0, 0x101030 -s_atomic_add s20, s0, 0x101040 -s_atomic_add s20, s0, 0x101050 -s_atomic_add s20, s0, 0x101060 -s_atomic_add s20, s0, 0x101070 -s_atomic_add s20, s0, 0x101080 -s_atomic_add s20, s0, 0x101090 - -s_atomic_add s20, s0, 0x101100 -s_atomic_add s20, s0, 0x101110 -s_atomic_add s20, s0, 0x101120 -s_atomic_add s20, s0, 0x101130 -s_atomic_add s20, s0, 0x101140 -s_atomic_add s20, s0, 0x101150 -s_atomic_add s20, s0, 0x101160 -s_atomic_add s20, s0, 0x101170 -s_atomic_add s20, s0, 0x101180 -s_atomic_add s20, s0, 0x101190 - -s_atomic_add s20, s0, 0x101200 -s_atomic_add s20, s0, 0x101210 -s_atomic_add s20, s0, 0x101220 -s_atomic_add s20, s0, 0x101230 -s_atomic_add s20, s0, 0x101240 -s_atomic_add s20, s0, 0x101250 -s_atomic_add s20, s0, 0x101260 -s_atomic_add s20, s0, 0x101270 -s_atomic_add s20, s0, 0x101280 -s_atomic_add s20, s0, 0x101290 - -s_atomic_add s20, s0, 0x101300 -s_atomic_add s20, s0, 0x101310 -s_atomic_add s20, s0, 0x101320 -s_atomic_add s20, s0, 0x101330 -s_atomic_add s20, s0, 0x101340 -s_atomic_add s20, s0, 0x101350 -s_atomic_add s20, s0, 0x101360 -s_atomic_add s20, s0, 0x101370 -s_atomic_add s20, s0, 0x101380 -s_atomic_add s20, s0, 0x101390 - -s_atomic_add s20, s0, 0x101400 -s_atomic_add s20, s0, 0x101404 -s_atomic_add s20, s0, 0x101408 -s_atomic_add s20, s0, 0x10140c -s_atomic_add s20, s0, 0x101410 -s_atomic_add s20, s0, 0x101414 -s_atomic_add s20, s0, 0x101418 -s_atomic_add s20, s0, 0x10141c -s_atomic_add s20, s0, 0x101420 -s_atomic_add s20, s0, 0x101424 -s_atomic_add s20, s0, 0x101428 -s_atomic_add s20, s0, 0x10142c - -s_atomic_add s20, s0, 0x101500 -s_atomic_add s20, s0, 0x101504 -s_atomic_add s20, s0, 0x101508 -s_atomic_add s20, s0, 0x10150c -s_atomic_add s20, s0, 0x101510 -s_atomic_add s20, s0, 0x101514 -s_atomic_add s20, s0, 0x101518 -s_atomic_add s20, s0, 0x10151c -s_atomic_add s20, s0, 0x101520 -s_atomic_add s20, s0, 0x101524 -s_atomic_add s20, s0, 0x101528 -s_atomic_add s20, s0, 0x10152c - -s_atomic_add s20, s0, 0x101600 -s_atomic_add s20, s0, 0x101604 -s_atomic_add s20, s0, 0x101608 -s_atomic_add s20, s0, 0x10160c -s_atomic_add s20, s0, 0x101610 -s_atomic_add s20, s0, 0x101614 -s_atomic_add s20, s0, 0x101618 -s_atomic_add s20, s0, 0x10161c -s_atomic_add s20, s0, 0x101620 -s_atomic_add s20, s0, 0x101624 -s_atomic_add s20, s0, 0x101628 -s_atomic_add s20, s0, 0x10162c - -s_atomic_add s20, s0, 0x101700 -s_atomic_add s20, s0, 0x101704 -s_atomic_add s20, s0, 0x101708 -s_atomic_add s20, s0, 0x10170c -s_atomic_add s20, s0, 0x101710 -s_atomic_add s20, s0, 0x101714 -s_atomic_add s20, s0, 0x101718 -s_atomic_add s20, s0, 0x10171c -s_atomic_add s20, s0, 0x101720 -s_atomic_add s20, s0, 0x101724 -s_atomic_add s20, s0, 0x101728 -s_atomic_add s20, s0, 0x10172c - -s_atomic_add s20, s0, 0x101800 -s_atomic_add s20, s0, 0x101804 -s_atomic_add s20, s0, 0x101808 -s_atomic_add s20, s0, 0x10180c -s_atomic_add s20, s0, 0x101810 -s_atomic_add s20, s0, 0x101814 -s_atomic_add s20, s0, 0x101818 -s_atomic_add s20, s0, 0x10181c -s_atomic_add s20, s0, 0x101820 -s_atomic_add s20, s0, 0x101824 -s_atomic_add s20, s0, 0x101828 -s_atomic_add s20, s0, 0x10182c - -s_atomic_add s20, s0, 0x101900 -s_atomic_add s20, s0, 0x101904 -s_atomic_add s20, s0, 0x101908 -s_atomic_add s20, s0, 0x10190c -s_atomic_add s20, s0, 0x101910 -s_atomic_add s20, s0, 0x101914 -s_atomic_add s20, s0, 0x101918 -s_atomic_add s20, s0, 0x10191c -s_atomic_add s20, s0, 0x101920 -s_atomic_add s20, s0, 0x101924 -s_atomic_add s20, s0, 0x101928 -s_atomic_add s20, s0, 0x10192c - -s_atomic_add s20, s0, 0x101a00 -s_atomic_add s20, s0, 0x101a04 -s_atomic_add s20, s0, 0x101a08 -s_atomic_add s20, s0, 0x101a0c -s_atomic_add s20, s0, 0x101a10 -s_atomic_add s20, s0, 0x101a14 -s_atomic_add s20, s0, 0x101a18 -s_atomic_add s20, s0, 0x101a1c -s_atomic_add s20, s0, 0x101a20 -s_atomic_add s20, s0, 0x101a24 -s_atomic_add s20, s0, 0x101a28 -s_atomic_add s20, s0, 0x101a2c - -s_atomic_add s20, s0, 0x101b00 -s_atomic_add s20, s0, 0x101b04 -s_atomic_add s20, s0, 0x101b08 -s_atomic_add s20, s0, 0x101b0c -s_atomic_add s20, s0, 0x101b10 -s_atomic_add s20, s0, 0x101b14 -s_atomic_add s20, s0, 0x101b18 -s_atomic_add s20, s0, 0x101b1c -s_atomic_add s20, s0, 0x101b20 -s_atomic_add s20, s0, 0x101b24 -s_atomic_add s20, s0, 0x101b28 -s_atomic_add s20, s0, 0x101b2c - -s_atomic_add s20, s0, 0x101c00 -s_atomic_add s20, s0, 0x101c04 -s_atomic_add s20, s0, 0x101c08 -s_atomic_add s20, s0, 0x101c0c -s_atomic_add s20, s0, 0x101c10 -s_atomic_add s20, s0, 0x101c14 -s_atomic_add s20, s0, 0x101c18 -s_atomic_add s20, s0, 0x101c1c -s_atomic_add s20, s0, 0x101c20 -s_atomic_add s20, s0, 0x101c24 -s_atomic_add s20, s0, 0x101c28 -s_atomic_add s20, s0, 0x101c2c - - -s_sub_u32 s30, s30, 1 -s_cmpk_eq_u32 s30, 0 -s_cbranch_scc0 LOAD_LOOP - - -s_cmpk_ge_i32 s9, 0x2 -s_cbranch_scc1 END - -ATOMIC_LOOP: - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - - -s_sub_u32 s30, s30, 1 -s_cmpk_eq_u32 s30, 0 -s_cbranch_scc0 ATOMIC_LOOP - -//s_waitcnt 0 - -END: -s_waitcnt 0 - -s_mov_b32 s16, 0xa5a50000 -s_store_dword s16, s[0:1], 0x40 glc - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tcc_return_control.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tcc_return_control.sp3 deleted file mode 100644 index 99be574f61..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tcc_return_control.sp3 +++ /dev/null @@ -1,1019 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -//set bit 12 low to select EA0 -s_mov_b32 s32, 0xFFFFEFFF -s_and_b32 s24, s24, s32 - -s_and_b32 s31, s9, 0x1 -s_cmpk_eq_i32 s31, 0x1 -s_cbranch_scc1 ODD_WAVES - -//set bit 12 high to select EA1 -s_mov_b32 s32, 0x1000 -s_or_b32 s24, s24, s32 - -ODD_WAVES: - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -v_mul_i32_i24 v3, v3, 0x10 -v_mul_i32_i24 v9, v9, 0x10 - -s_mov_b32 s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 - -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 - -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 - -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 - -s_barrier - -s_mov_b32 s31, 0xF0000 -s_mov_b32 s32, 0x6000 - -//store and load s8 times -s_mov_b32 s30, s8 - -//waves with the least 2 tgid (i.e. tgid0 and tgid1) will do atomic operations -//while the rest of waves will do load operations -s_cmpk_lt_i32 s9, 0x2 -s_cbranch_scc1 ATOMIC_LOOP - -LOAD_LOOP: - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -s_sub_u32 s30, s30, 1 -s_cmpk_eq_u32 s30, 0 -s_cbranch_scc0 LOAD_LOOP - - -s_cmpk_ge_i32 s9, 0x2 -s_cbranch_scc1 END - -ATOMIC_LOOP: - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -s_sub_u32 s30, s30, 1 -s_cmpk_eq_u32 s30, 0 -s_cbranch_scc0 ATOMIC_LOOP - -//s_waitcnt 0 - -END: -s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tcp_atcl1.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tcp_atcl1.sp3 deleted file mode 100644 index 93d27c8d80..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tcp_atcl1.sp3 +++ /dev/null @@ -1,80 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_load_dwordx4 s[20:23], s[0:1], 16 // load atc mem surface rsrc -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -var MTYPE_UC = 0x38000000 -s_or_b32 s27, s27, MTYPE_UC - - -//store and load s8 times -s_mov_b32 s8, 33 // store 33 times to overflow atcl1 cache... -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - -STORE_LOOP: -v_add_co_u32 v0, vcc[0:1], v0, 2 -buffer_store_dword v0, v9, s20, s31 idxen:1 glc:1 slc:1 -s_waitcnt 0 -s_add_u32 s31, s31, 4*1024 // step one 4KB page size -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - - - -var DEBUG_FUNCTION = 0 -//remove code to half shader run time -if DEBUG_FUNCTION -s_mov_b32 s8, s30 -s_mov_b32 s31, 0x0 - -LOAD_LOOP: -buffer_load_dword v0, v9, s20, s31 idxen:1 glc:1 slc:1 -s_waitcnt 0 -v_mov_b32 v12, v0 -s_add_u32 s31, s31, 4*1024 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 LOAD_LOOP -end - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tcp_utcl1_fifo1.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tcp_utcl1_fifo1.sp3 deleted file mode 100644 index ac9744f68c..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_tcp_utcl1_fifo1.sp3 +++ /dev/null @@ -1,80 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_load_dwordx4 s[20:23], s[0:1], 16 // load atc mem surface rsrc -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -var MTYPE_UC = 0x38000000 -s_or_b32 s27, s27, MTYPE_UC - - -//store and load s8 times -s_mov_b32 s8, 33 // store 33 times to overflow atcl1 cache... -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - -STORE_LOOP: -v_add_co_u32 v0, vcc[0:1], v0, 2 -buffer_store_dword v0, v9, s20, s31 idxen:1 glc:1 slc:1 -s_waitcnt 0 -s_add_u32 s31, s31, 4*1024 // step one 4KB page size -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - - - -var DEBUG_FUNCTION = 1 -//remove code to half shader run time -if DEBUG_FUNCTION -s_mov_b32 s8, 0x20 -s_mov_b32 s31, 0xffc - -LOAD_LOOP: -buffer_load_dwordx2 v[0:1], v9, s20, s31 idxen:1 glc:1 slc:1 -s_waitcnt 0 -v_mov_b32 v12, v0 -s_add_u32 s31, s31, 4*1024 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 LOAD_LOOP -end - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_utc_vmwalker_pde_02.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_utc_vmwalker_pde_02.sp3 deleted file mode 100644 index 9c66b37275..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_utc_vmwalker_pde_02.sp3 +++ /dev/null @@ -1,72 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 -//bump up the addresses being accessed to generate multiple reads to the pde memories -v_mul_u32_u24 v9, 65536, v9 -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - -//Hack number of records to avoid range checking which we don't want since we want to generate -//out of range accesses. we are really trying to generate many reads to the PDEs to get FUE. -s_mov_b32 s26, 0xffffffff - -STORE_LOOP: -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_waitcnt 0 -s_add_u32 s31, s31, 0x4 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_mov_b32 s8, s30 -s_mov_b32 s31, 0x0 - -LOAD_LOOP: -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_waitcnt 0 -v_mov_b32 v12, v0 -s_add_u32 s31, s31, 0x4 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 LOAD_LOOP - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_utc_vmwalker_pde_03.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_utc_vmwalker_pde_03.sp3 deleted file mode 100644 index d4d53d09a0..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_utc_vmwalker_pde_03.sp3 +++ /dev/null @@ -1,72 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 -//bump up the addresses being accessed to generate multiple reads to the pde memories -v_mul_u32_u24 v9, 4096, v9 -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - -//Hack number of records to avoid range checking which we don't want since we want to generate -//out of range accesses. we are really trying to generate many reads to the PDEs to get FUE. -s_mov_b32 s26, 0xffffffff - -STORE_LOOP: -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_waitcnt 0 -s_add_u32 s31, s31, 0x4 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_mov_b32 s8, s30 -s_mov_b32 s31, 0x0 - -LOAD_LOOP: -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_waitcnt 0 -v_mov_b32 v12, v0 -s_add_u32 s31, s31, 0x4 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 LOAD_LOOP - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_vgpr.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_vgpr.sp3 deleted file mode 100644 index 138e13eded..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_vgpr.sp3 +++ /dev/null @@ -1,47 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(8) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset - -tgid_x_en(1) //s_tgid_x s8 -tgid_y_en(1) //s_tgid_y s9 -tgid_z_en(1) //s_tgid_z s10 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//read mem data -s_mov_b32 s31, 0x0 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:1 -s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_vgpr_01.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_vgpr_01.sp3 deleted file mode 100644 index 6d3ed3f9db..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_vgpr_01.sp3 +++ /dev/null @@ -1,54 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - -STORE_LOOP: -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_waitcnt 0 -s_add_u32 s31, s31, 0x4 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_endpgm -end diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_vgpr_02.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_vgpr_02.sp3 deleted file mode 100644 index 3228e2377d..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_vgpr_02.sp3 +++ /dev/null @@ -1,54 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -s_mov_b32 s16, s2 - -//SPI may touch v0,v1,v2 before shader is run - -//store it 10 times -v_mov_b32 v10, v1 -v_mov_b32 v11, v2 -v_mov_b32 v12, v1 -v_mov_b32 v13, v2 -v_mov_b32 v14, v1 -v_mov_b32 v15, v2 -v_mov_b32 v16, v1 -v_mov_b32 v17, v2 -v_mov_b32 v18, v1 -v_mov_b32 v19, v0 - -// read them back -v_mov_b32 v29, v10 -v_mov_b32 v28, v11 -v_mov_b32 v27, v12 -v_mov_b32 v26, v13 -v_mov_b32 v25, v14 -v_mov_b32 v24, v15 -v_mov_b32 v23, v16 -v_mov_b32 v22, v17 -v_mov_b32 v21, v18 -v_mov_b32 v20, v19 - -s_store_dword s16, s[0:1], 0x0 glc - -s_endpgm -end diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_vgpr_clear.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_vgpr_clear.sp3 deleted file mode 100644 index 190e1cfc24..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_edc_vgpr_clear.sp3 +++ /dev/null @@ -1,75 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(2) // 2 for the buffer resource + 5 for thread/thread group parameters - //s[0:1] the mmeory address for the buffer resource - - tgid_x_en(1) //s_tgid_x s2 - tgid_y_en(1) //s_tgid_y s3 - tgid_z_en(1) //s_tgid_z s4 - - //vo for tid_x - //v1 for tid_y - //v2 for tid_z - - for var vgpr = 0; vgpr < 256; ++vgpr - v_accvgpr_read v[vgpr], acc[vgpr] - end - - for var vgpr = 0; vgpr < 256; ++vgpr - v_accvgpr_write acc[vgpr], v[vgpr] - end - - s_movk_i32 m0, 0x0000 - s_mov_b32 s10, 0x000000f8 - s_set_gpr_idx_on s10, 0x8 -label_0004: - v_mov_b32 v0, 0 - v_mov_b32 v1, 0 - v_mov_b32 v2, 0 - v_mov_b32 v3, 0 - v_mov_b32 v4, 0 - v_mov_b32 v5, 0 - v_mov_b32 v6, 0 - v_mov_b32 v7, 0 - s_sub_u32 s10, s10, 8 - s_set_gpr_idx_idx s10 - s_cbranch_scc0 label_0004 - s_set_gpr_idx_off - v_mbcnt_lo_u32_b32 v1, exec_hi, 0 - v_mbcnt_hi_u32_b32 v1, exec_lo, v1 - v_mul_u32_u24 v1, 8, v1 - s_getreg_b32 s11, hwreg(HW_REG_HW_ID, 4, 2) - s_mulk_i32 s11, 0x4000 - v_add_co_u32 v1, vcc, v1, s11 - s_mov_b32 s10, 7 - s_mov_b32 m0, -1 -label_001B: - ds_write2_b64 v1, v[2:3], v[2:3] offset1:64 - ds_write2_b64 v1, v[4:5], v[4:5] offset0:128 offset1:192 - v_add_co_u32 v1, vcc, 0x00000800, v1 - s_sub_u32 s10, s10, 1 - s_cbranch_scc0 label_001B - - s_getreg_b32 s20, hwreg(HW_REG_HW_ID, 0, 32) - // s12 = SIMD - s_lshr_b32 s12,s20,4 - s_and_b32 s12, s12, 0x3 - // s13 = CU - s_lshr_b32 s13,s20,8 - s_and_b32 s13, s13, 0xf - // s14 = SE - s_lshr_b32 s14,s20,13 - s_and_b32 s14, s14, 0x7 - // s15 = SE * 16 * 4 + CU * 4 + SIMD - s_mul_i32 s16, s14, 64 - s_mul_i32 s17, s13, 4 - s_add_i32 s15, s16, s17 - s_add_i32 s15, s15, s12 - s_mul_i32 s16, s15, 4 - - s_store_dword s15, s[0:1], s16 glc - s_waitcnt 0 - -s_endpgm -end diff --git a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_wf_lifetime_cs_pm4_01_cs.sp3 b/ras_prebuild/sp3/gfx9/edc/arcturus/gc_wf_lifetime_cs_pm4_01_cs.sp3 deleted file mode 100644 index 9069afdb3e..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/arcturus/gc_wf_lifetime_cs_pm4_01_cs.sp3 +++ /dev/null @@ -1,58 +0,0 @@ -//s[0:1]: buffer resource -//s2: num_threads_x_full -//s3: num_threads_x_full * num_threads_y_full -//s4: num_threads_x_full * num_threads_y_full * num_threads_z_full -//s5: COMPUTE_DIM_X -//s6: COMPUTE_DIM_X * COMPUTE_DIM_Y -//s7: loop_lifetime -//s8: dispatch_offset -//s[9:11]: thread group ID -//v[0:2]: thread ID - -shader main - -type(CS) -user_sgpr_count(9) -tgid_x_en(1) -tgid_y_en(1) -tgid_z_en(1) - -//sp3 loop for lifetime -s_mov_b32 s12, 0 //init loop idx s12 -label_0004: -s_cmp_lt_i32 s12, s7 //scc = (s12 < s7) ? 1 : 0 -s_cbranch_scc0 label_0006 //if(scc == 0) then jump to label_0006; else nop - -v_mov_b32 v4,s12 -s_add_i32 s12, s12, 1 //add loop incr -s_branch label_0004 - -label_0006: //end of SP3 loop - -//v3 thread_id_in_group = (tid_z * num_threads_x_full * num_threads_y_full) + (tid_y * num_threads_x_full) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 //v3 = tid_y * num_threads_x_full + tid_x -v_mad_u32_u24 v3, v2, s3, v3 //v3 = tid_z * num_threads_x_ful * num_threads_y_full + v3 - -//s28 thread_group_id = (tgid_z * COMPUTE_DIM_X * COMPUTE_DIM_Y) + (tgid_y * COMPUTE_DIM_X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 //tgid_y * COMPUTE_DIM_X -s_add_i32 s28, s28, s_tgid_x //tgid_y * COMPUTE_DIM_X + tgid_x -s_mul_i32 s29, s6, s_tgid_z //tgid_z * COMPUTE_DIM_X * COMPUTE_DIM_Y -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id * (num_threads_x_full * num_threads_y_full * num_threads_z_full) + thread_id_in_group -v_mov_b32 v9, s28 //thread_group_id -v_mad_u32_u24 v9, v9, s4, v3 - -//fetch the buffer resource -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -//write absolute thread id using it as an index -buffer_store_dword v9, v9, s24, s8 idxen:1 -s_waitcnt 0 - -s_mov_b32 s16, 0xa5a50000 -s_store_dword s16, s[0:1], 0x40 glc - -s_endpgm -end diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_clear_vgpr_lds_arcturus.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_clear_vgpr_lds_arcturus.bin deleted file mode 100644 index d0ea4951e1..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_clear_vgpr_lds_arcturus.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_compute.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_compute.bin deleted file mode 100644 index 46bb0ebc23..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_compute.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_cp_sq.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_cp_sq.bin deleted file mode 100644 index 3400e4dd29..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_cp_sq.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_cp_sq_gds_read.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_cp_sq_gds_read.bin deleted file mode 100644 index 1efe1c042e..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_cp_sq_gds_read.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_cs_trap_handler.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_cs_trap_handler.bin deleted file mode 100644 index 482e614299..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_cs_trap_handler.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_dc_restore.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_dc_restore.bin deleted file mode 100644 index c5b11be40e..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_dc_restore.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_ea.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_ea.bin deleted file mode 100644 index 578dc7429c..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_ea.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_ea_02.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_ea_02.bin deleted file mode 100644 index d0f2aff8e0..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_ea_02.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_ea_dirty.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_ea_dirty.bin deleted file mode 100644 index 5372f17243..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_ea_dirty.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_gds_02.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_gds_02.bin deleted file mode 100644 index 451fbb9f53..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_gds_02.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_gds_address1.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_gds_address1.bin deleted file mode 100644 index ae19466d3f..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_gds_address1.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_gds_oa_phy.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_gds_oa_phy.bin deleted file mode 100644 index efc7391e00..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_gds_oa_phy.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_lds_02.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_lds_02.bin deleted file mode 100644 index c6efa4715c..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_lds_02.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_lds_address1.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_lds_address1.bin deleted file mode 100644 index 2da7b49806..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_lds_address1.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_mc_read.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_mc_read.bin deleted file mode 100644 index 78335009bd..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_mc_read.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_mc_read_tcc_halt.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_mc_read_tcc_halt.bin deleted file mode 100644 index 126518f5c7..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_mc_read_tcc_halt.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_no_pollute.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_no_pollute.bin deleted file mode 100644 index 78335009bd..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_no_pollute.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sgpr_02.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sgpr_02.bin deleted file mode 100644 index f7e785224b..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sgpr_02.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sh_atcl1.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sh_atcl1.bin deleted file mode 100644 index 9a7f85d0f9..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sh_atcl1.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin deleted file mode 100644 index 378f968d6f..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank_tag.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank_tag.bin deleted file mode 100644 index 73f31aa188..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_data_bank_tag.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_dirty_bit.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_dirty_bit.bin deleted file mode 100644 index b36e1036d6..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_dirty_bit.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_atcl1.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_atcl1.bin deleted file mode 100644 index 8ccfde0863..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_atcl1.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank.bin deleted file mode 100644 index 5dbdf029ac..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin deleted file mode 100644 index 46b24b5909..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_sqc_inst_bank_snop.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_ta_address1.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_ta_address1.bin deleted file mode 100644 index 78335009bd..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_ta_address1.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin deleted file mode 100644 index 82c857dd7d..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02_ea.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02_ea.bin deleted file mode 100644 index 602c0c115f..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tc_02_ea.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tc_atomic_02.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tc_atomic_02.bin deleted file mode 100644 index 8a69de8835..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tc_atomic_02.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tc_atomic_03.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tc_atomic_03.bin deleted file mode 100644 index 1318fcb6ff..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tc_atomic_03.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tc_atomic_04.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tc_atomic_04.bin deleted file mode 100644 index ce1ac3d93a..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tc_atomic_04.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tcc_return_control.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tcc_return_control.bin deleted file mode 100644 index d8184f9e5d..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tcc_return_control.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tcp_atcl1.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tcp_atcl1.bin deleted file mode 100644 index d156918db0..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tcp_atcl1.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tcp_utcl1_fifo1.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tcp_utcl1_fifo1.bin deleted file mode 100644 index b1dddf5fe4..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_tcp_utcl1_fifo1.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_utc_vmwalker_pde_02.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_utc_vmwalker_pde_02.bin deleted file mode 100644 index 2138d79126..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_utc_vmwalker_pde_02.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_utc_vmwalker_pde_03.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_utc_vmwalker_pde_03.bin deleted file mode 100644 index c5127c52b7..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_utc_vmwalker_pde_03.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_vgpr.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_vgpr.bin deleted file mode 100644 index 3be99fde9a..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_vgpr.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_vgpr_01.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_vgpr_01.bin deleted file mode 100644 index 0d98d2a2a8..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_vgpr_01.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_vgpr_02.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_vgpr_02.bin deleted file mode 100644 index e54d2c852e..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_vgpr_02.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_vgpr_clear.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_vgpr_clear.bin deleted file mode 100644 index 326a37c3ab..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_edc_vgpr_clear.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_wf_lifetime_cs_pm4_01_cs.bin b/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_wf_lifetime_cs_pm4_01_cs.bin deleted file mode 100644 index 2605f6ae57..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/arcturus/gc_wf_lifetime_cs_pm4_01_cs.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_clear_vgpr_lds_arcturus.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_clear_vgpr_lds_arcturus.bin deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_compute.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_compute.bin deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_cp_sq.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_cp_sq.bin deleted file mode 100644 index 3400e4dd29..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_cp_sq.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_cp_sq_gds_read.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_cp_sq_gds_read.bin deleted file mode 100644 index 1efe1c042e..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_cp_sq_gds_read.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_cs_trap_handler.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_cs_trap_handler.bin deleted file mode 100644 index 482e614299..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_cs_trap_handler.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_dc_restore.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_dc_restore.bin deleted file mode 100644 index c5b11be40e..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_dc_restore.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_ea.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_ea.bin deleted file mode 100644 index 578dc7429c..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_ea.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_ea_02.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_ea_02.bin deleted file mode 100644 index d0f2aff8e0..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_ea_02.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_ea_dirty.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_ea_dirty.bin deleted file mode 100644 index 5372f17243..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_ea_dirty.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_gds_02.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_gds_02.bin deleted file mode 100644 index 451fbb9f53..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_gds_02.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_gds_address1.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_gds_address1.bin deleted file mode 100644 index ae19466d3f..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_gds_address1.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_gds_oa_phy.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_gds_oa_phy.bin deleted file mode 100644 index efc7391e00..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_gds_oa_phy.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_lds_02.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_lds_02.bin deleted file mode 100644 index c6efa4715c..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_lds_02.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_lds_address1.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_lds_address1.bin deleted file mode 100644 index 2da7b49806..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_lds_address1.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_mc_read.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_mc_read.bin deleted file mode 100644 index 78335009bd..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_mc_read.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_mc_read_tcc_halt.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_mc_read_tcc_halt.bin deleted file mode 100644 index 126518f5c7..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_mc_read_tcc_halt.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_no_pollute.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_no_pollute.bin deleted file mode 100644 index 78335009bd..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_no_pollute.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sgpr_02.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sgpr_02.bin deleted file mode 100644 index f7e785224b..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sgpr_02.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sh_atcl1.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sh_atcl1.bin deleted file mode 100644 index 9a7f85d0f9..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sh_atcl1.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin deleted file mode 100644 index 378f968d6f..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank_tag.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank_tag.bin deleted file mode 100644 index 73f31aa188..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sqc_data_bank_tag.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sqc_dirty_bit.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sqc_dirty_bit.bin deleted file mode 100644 index b36e1036d6..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sqc_dirty_bit.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_atcl1.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_atcl1.bin deleted file mode 100644 index 8ccfde0863..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_atcl1.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank.bin deleted file mode 100644 index 5dbdf029ac..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin deleted file mode 100644 index 46b24b5909..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_sqc_inst_bank_snop.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_ta_address1.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_ta_address1.bin deleted file mode 100644 index 78335009bd..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_ta_address1.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin deleted file mode 100644 index 82c857dd7d..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tc_02.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tc_02_ea.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tc_02_ea.bin deleted file mode 100644 index 602c0c115f..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tc_02_ea.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tc_atomic_02.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tc_atomic_02.bin deleted file mode 100644 index 8a69de8835..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tc_atomic_02.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tc_atomic_03.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tc_atomic_03.bin deleted file mode 100644 index 1318fcb6ff..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tc_atomic_03.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tc_atomic_04.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tc_atomic_04.bin deleted file mode 100644 index ce1ac3d93a..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tc_atomic_04.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tcc_return_control.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tcc_return_control.bin deleted file mode 100644 index d8184f9e5d..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tcc_return_control.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tcp_atcl1.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tcp_atcl1.bin deleted file mode 100644 index d156918db0..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tcp_atcl1.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tcp_utcl1_fifo1.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tcp_utcl1_fifo1.bin deleted file mode 100644 index b1dddf5fe4..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_tcp_utcl1_fifo1.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_utc_vmwalker_pde_02.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_utc_vmwalker_pde_02.bin deleted file mode 100644 index 2138d79126..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_utc_vmwalker_pde_02.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_utc_vmwalker_pde_03.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_utc_vmwalker_pde_03.bin deleted file mode 100644 index c5127c52b7..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_utc_vmwalker_pde_03.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_vgpr.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_vgpr.bin deleted file mode 100644 index 3be99fde9a..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_vgpr.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_vgpr_01.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_vgpr_01.bin deleted file mode 100644 index 0d98d2a2a8..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_vgpr_01.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_vgpr_02.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_vgpr_02.bin deleted file mode 100644 index e54d2c852e..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_vgpr_02.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_vgpr_clear.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_edc_vgpr_clear.bin deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_wf_lifetime_cs_pm4_01_cs.bin b/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_wf_lifetime_cs_pm4_01_cs.bin deleted file mode 100644 index 2605f6ae57..0000000000 Binary files a/ras_prebuild/sp3/gfx9/edc/bin/vega20/gc_wf_lifetime_cs_pm4_01_cs.bin and /dev/null differ diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_cp_sq.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_cp_sq.sp3 deleted file mode 100644 index e1554afa66..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_cp_sq.sp3 +++ /dev/null @@ -1,59 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(8) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset - -tgid_x_en(1) //s_tgid_x s8 -tgid_y_en(1) //s_tgid_y s9 -tgid_z_en(1) //s_tgid_z s10 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//read mem data -s_mov_b32 s31, 0x0 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:1 -s_waitcnt 0 - -v_mov_b32 v10, v0 -//buffer_load_dword v10, v9, s24, s31 idxen:1 glc:1 -//s_waitcnt 0 -//v_mov_b32 v11, v1 -s_nop 0x1 -s_nop 0x1 -s_nop 0x1 -s_nop 0x1 - -s_mov_b32 s16, 0xa5a50000 -s_store_dword s16, s[0:1], 0x40 glc - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_cp_sq_gds_read.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_cp_sq_gds_read.sp3 deleted file mode 100644 index a80ea56817..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_cp_sq_gds_read.sp3 +++ /dev/null @@ -1,60 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(8) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset - -tgid_x_en(1) //s_tgid_x s8 -tgid_y_en(1) //s_tgid_y s9 -tgid_z_en(1) //s_tgid_z s10 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - - -//read from the GDS -v_lshlrev_b32 v10, 2, v3 -s_mov_b32 m0, 0xFFFF -s_nop 0x1 -s_nop 0x1 -s_nop 0x1 - -ds_read_b32 v11, v10 gds:1 -s_waitcnt 0 - -v_mov_b32 v12, v11 - -s_nop 0x1 -s_nop 0x1 -s_nop 0x1 -s_nop 0x1 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_cs_trap_handler.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_cs_trap_handler.sp3 deleted file mode 100644 index 5e4eb34c79..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_cs_trap_handler.sp3 +++ /dev/null @@ -1,673 +0,0 @@ -shader main - -type(CS) - -/*************************************************************************/ -/* control on how to run the shader */ -/*************************************************************************/ -//any hack that needs to be made to run this code in EMU (either becasue various EMU code are not ready or no compute save & restore in EMU run) -var EMU_RUN_HACK = 1 -var EMU_RUN_HACK_RESTORE_NORMAL = 0 -var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0 -var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0 -var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK -var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK -var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK -var SAVE_LDS = 0 -var WG_BASE_ADDR_LO = 0x9000a000 -var WG_BASE_ADDR_HI = 0x0 -var WAVE_SPACE = 0x5000 //memory size that each wave occupies in workgroup state mem -var CTX_SAVE_CONTROL = 0x0 -var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL -var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either becasue various RTL code are not ready or no compute save & restore in RTL run) -var SGPR_SAVE_USE_SQC = 0 //use SQC D$ to do the write -var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //need to change BUF_DATA_FORMAT in S_SAVE_BUF_RSRC_WORD3_MISC from 0 to BUF_DATA_FORMAT_32 if set to 1 (i.e. 0x00827FAC) -var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing - -/**************************************************************************/ -/* variables */ -/**************************************************************************/ -var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 -var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 - -var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 -var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 -var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 -var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6 -var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24 -var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits - -var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 -var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 -var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100 -var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8 -var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF -var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0 -var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10 -var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800 -var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11 -var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21 - -var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME -var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME -var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME - -var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24 -var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27 - - -/* Save */ -var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes -var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE - -var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit -var S_SAVE_SPI_INIT_ATC_SHIFT = 27 -var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype -var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28 -var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG -var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 - -var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used -var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME -var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME -var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME - -var s_save_spi_init_lo = exec_lo -var s_save_spi_init_hi = exec_hi - - //tba_lo and tba_hi need to be saved/restored -var tba_lo = ttmp12 -var tba_hi = ttmp13 -var tma_lo = ttmp14 -var tma_hi = ttmp15 - -var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3¡¯h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} -var s_save_pc_hi = ttmp1 -var s_save_exec_lo = ttmp2 -var s_save_exec_hi = ttmp3 -var s_save_status = ttmp4 -var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine -var s_save_xnack_mask_lo = ttmp6 -var s_save_xnack_mask_hi = ttmp7 -var s_save_buf_rsrc0 = ttmp8 -var s_save_buf_rsrc1 = ttmp9 -var s_save_buf_rsrc2 = ttmp10 -var s_save_buf_rsrc3 = ttmp11 - -var s_save_mem_offset = tma_lo -var s_save_alloc_size = s_save_trapsts //conflict -var s_save_tmp = s_save_buf_rsrc2 //shared with s_save_buf_rsrc2 (conflict: should not use mem access with s_save_tmp at the same time) -var s_save_m0 = tma_hi - -/* Restore */ -var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE -var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC - -var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit -var S_RESTORE_SPI_INIT_ATC_SHIFT = 27 -var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype -var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28 -var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG -var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26 - -var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT -var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK -var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT -var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK - -var s_restore_spi_init_lo = exec_lo -var s_restore_spi_init_hi = exec_hi - -var s_restore_mem_offset = ttmp2 -var s_restore_alloc_size = ttmp3 -var s_restore_tmp = ttmp6 //tba_lo/hi need to be restored -var s_restore_mem_offset_save = s_restore_tmp //no conflict - -var s_restore_m0 = s_restore_alloc_size //no conflict - -var s_restore_mode = ttmp7 - -var s_restore_pc_lo = ttmp0 -var s_restore_pc_hi = ttmp1 -var s_restore_exec_lo = tma_lo //no conflict -var s_restore_exec_hi = tma_hi //no conflict -var s_restore_status = ttmp4 -var s_restore_trapsts = ttmp5 -var s_restore_xnack_mask_lo = xnack_mask_lo -var s_restore_xnack_mask_hi = xnack_mask_hi -var s_restore_buf_rsrc0 = ttmp8 -var s_restore_buf_rsrc1 = ttmp9 -var s_restore_buf_rsrc2 = ttmp10 -var s_restore_buf_rsrc3 = ttmp11 - -/**************************************************************************/ -/* trap handler entry points */ -/**************************************************************************/ - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) //hack to use trap_id for determining save/restore - //FIXME VCCZ un-init assertion s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC - s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000 //change SCC - s_cmp_eq_u32 s_save_tmp, 0x007e0000 //Save: trap_id = 0x7e. Restore: trap_id = 0x7f. - s_cbranch_scc0 L_JUMP_TO_RESTORE //do not need to recover STATUS here since we are going to RESTORE - //FIXME s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //need to recover STATUS since we are going to SAVE - s_branch L_SKIP_RESTORE //NOT restore, SAVE actually - else - s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save - end - -L_JUMP_TO_RESTORE: - s_branch L_RESTORE //restore - -L_SKIP_RESTORE: - - s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC - s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save - s_cbranch_scc1 L_SAVE //this is the operation for save - //the poential code (such as restore STATUS) on this path is for regular trap handling and don't care for compute save & restore - - //EMU will not execute the code since in hack mode it is skipped while in normal mode there is no save in EMU - //SIM will only execute the code in normal S/R mode but not in hack mode - if (!EMU_RUN_HACK) - L_ERROR: //to catch incorrect savectx setting in SIM assuming the trap handler is only used for save & restore - s_branch L_ERROR - end - -/**************************************************************************/ -/* save routine */ -/**************************************************************************/ - -L_SAVE: - - //check whether there is mem_viol - s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK - s_cbranch_scc0 L_NO_PC_REWIND - - //if so, need rewind PC assuming GDS operation gets NACKed - s_mov_b32 s_save_tmp, 0 //clear mem_viol bit - s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit - s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] - s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8 - s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0 // -scc - -L_NO_PC_REWIND: - s_mov_b32 s_save_tmp, 0 //clear saveCtx bit - s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit - - s_mov_b32 s_save_xnack_mask_lo, xnack_mask_lo //save XNACK_MASK - s_mov_b32 s_save_xnack_mask_hi, xnack_mask_hi - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT - s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT - s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) //save FIRST_REPLAY - s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT - s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY in IB_STS - s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG - - s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp - - /* inform SPI the readiness and wait for SPI's go signal */ - s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI - s_mov_b32 s_save_exec_hi, exec_hi - s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive - if (EMU_RUN_HACK) - - else - s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC - end - - L_SLEEP: - s_sleep 0x2 - - if (EMU_RUN_HACK) - - else - s_cbranch_execz L_SLEEP - end - - - /* setup Resource Contants */ - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE)) - //calculate wd_addr using absolute thread id - v_readlane_b32 s_save_tmp, v9, 0 - s_lshr_b32 s_save_tmp, s_save_tmp, 6 - s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE - s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO - s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI - s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL - else - end - if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE)) - s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO - s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI - s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL - else - end - - - s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo - s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi - s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE - s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited - s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC - s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK - s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position - s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC - s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK - s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position - s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE - - //FIXME right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi (might need to save them before using them?) - s_mov_b32 s_save_m0, m0 //save M0 - - /* global mem offset */ - s_mov_b32 s_save_mem_offset, 0x0 //mem offset initial value = 0 - - - /* the first wave in the threadgroup */ - s_barrier //FIXME not performance-optimal "LDS is used? wait for other waves in the same TG" - s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here - s_cbranch_scc0 L_SAVE_VGPR - - /* save LDS */ - ////////////////////////////// - L_SAVE_LDS: - - s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on - s_mov_b32 exec_hi, 0xFFFFFFFF - - s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size - s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? - s_cbranch_scc0 L_SAVE_VGPR //no lds used? jump to L_SAVE_VGPR - s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 6 //LDS size in dwords = lds_size * 64dw - s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //LDS size in bytes - s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - s_mov_b32 m0, 0x0 //lds_offset initial value = 0 - - L_SAVE_LDS_LOOP: - if (SAVE_LDS) - buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 - end - s_add_u32 m0, m0, 256 //every buffer_store_lds does 256 bytes - s_add_u32 s_save_mem_offset, s_save_mem_offset, 256 //mem offset increased by 256 bytes - s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_SAVE_LDS_LOOP //LDS save is complete? - - - /* save VGPRs */ - ////////////////////////////// - L_SAVE_VGPR: - - s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on - s_mov_b32 exec_hi, 0xFFFFFFFF - - s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size - s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 - s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible - s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - - s_mov_b32 m0, 0x0 //VGPR initial index value =0 - s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1 - s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later - - L_SAVE_VGPR_LOOP: - v_mov_b32 v0, v0 //v0 = v[0+m0] - - if(USE_MTBUF_INSTEAD_OF_MUBUF) - tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - else - buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 - end - - s_add_u32 m0, m0, 1 //next vgpr index - s_add_u32 s_save_mem_offset, s_save_mem_offset, 256 //every buffer_store_dword does 256 bytes - s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? - s_set_gpr_idx_off - - /* save SGPRs */ - ////////////////////////////// - s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size - s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 - s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) - - if (SGPR_SAVE_USE_SQC) - s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 2 //NUM_RECORDS in bytes - else - s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) - end - - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - - s_mov_b32 m0, 0x0 //SGPR initial index value =0 - s_nop 0x0 //Manually inserted wait states - - L_SAVE_SGPR_LOOP: - s_movrels_b32 s0, s0 //s0 = s[0+m0] - write_sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //PV: the best performance should be using s_buffer_store_dwordx4 - s_add_u32 m0, m0, 1 //next sgpr index - s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_SAVE_SGPR_LOOP //SGPR save is complete? - - /* save HW registers */ - ////////////////////////////// - L_SAVE_HWREG: - s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - - - write_sgpr_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //M0 - - if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME)) - s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 - s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over - s_mov_b32 tba_lo, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO - s_mov_b32 tba_hi, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI - end - - write_sgpr_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //PC - write_sgpr_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) - write_sgpr_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //EXEC - write_sgpr_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) - write_sgpr_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //STATUS - - //s_save_trapsts conflicts with s_save_alloc_size - s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - write_sgpr_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //TRAPSTS - - write_sgpr_to_mem(s_save_xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //XNACK_MASK_LO - write_sgpr_to_mem(s_save_xnack_mask_hi, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //XNACK_MASK_HI - - //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2 - s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE - write_sgpr_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) - - write_sgpr_to_mem(tba_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //TBA_LO - write_sgpr_to_mem(tba_hi, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //TBA_HI - - /* S_PGM_END_SAVED */ //FIXME graphics ONLY - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT)) - s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] - s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 - s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over - s_rfe_b64 s_save_pc_lo //Return to the main shader program - else - end - - - s_branch L_END_PGM - - - -/**************************************************************************/ -/* restore routine */ -/**************************************************************************/ - -L_RESTORE: - /* Setup Resource Contants */ - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) - //calculate wd_addr using absolute thread id - v_readlane_b32 s_restore_tmp, v9, 0 - s_lshr_b32 s_restore_tmp, s_restore_tmp, 6 - s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE - s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO - s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI - s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL - else - end - - s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo - s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi - s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE - s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) - s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK - s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position - s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK - s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position - s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE - - /* global mem offset */ - s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0 - - /* the first wave in the threadgroup */ - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK - s_cbranch_scc0 L_RESTORE_VGPR - - /* restore LDS */ - ////////////////////////////// - L_RESTORE_LDS: - - s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead - s_mov_b32 exec_hi, 0xFFFFFFFF - - s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size - s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero? - s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR - s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 6 //LDS size in dwords = lds_size * 64dw - s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //LDS size in bytes - s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - s_mov_b32 m0, 0x0 //lds_offset initial value = 0 - - L_RESTORE_LDS_LOOP: - if (SAVE_LDS) - buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 - end - s_add_u32 m0, m0, 256 //every buffer_load_dword does 256 bytes - s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256 bytes - s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_RESTORE_LDS_LOOP //LDS restore is complete? - - - /* restore VGPRs */ - ////////////////////////////// - L_RESTORE_VGPR: - - s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead - s_mov_b32 exec_hi, 0xFFFFFFFF - - s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size - s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 - s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) - s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last - s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 - s_mov_b32 m0, 1 //VGPR initial index value = 1 - s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8 - s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later - - L_RESTORE_VGPR_LOOP: - if(USE_MTBUF_INSTEAD_OF_MUBUF) - tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - else - buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 - end - s_waitcnt vmcnt(0) //ensure data ready - v_mov_b32 v0, v0 //v[0+m0] = v0 - s_add_u32 m0, m0, 1 //next vgpr index - s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //every buffer_load_dword does 256 bytes - s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_RESTORE_VGPR_LOOP //VGPR restore (except v0) is complete? - s_set_gpr_idx_off - /* VGPR restore on v0 */ - if(USE_MTBUF_INSTEAD_OF_MUBUF) - tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - else - buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 - end - - - /* restore SGPRs */ - ////////////////////////////// - s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size - s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 - s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) - - if (SGPR_SAVE_USE_SQC) - s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 2 //NUM_RECORDS in bytes - else - s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) - end - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - read_sgpr_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //save s0 to s_restore_tmp - s_mov_b32 m0, 0x1 //SGPR initial index value =1 //go on with with s1 - - L_RESTORE_SGPR_LOOP: - read_sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //PV: further performance improvement can be made - s_waitcnt lgkmcnt(0) //ensure data ready - s_movreld_b32 s0, s0 //s[0+m0] = s0 - s_add_u32 m0, m0, 1 //next sgpr index - s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_RESTORE_SGPR_LOOP //SGPR restore (except s0) is complete? - s_mov_b32 s0, s_restore_tmp /* SGPR restore on s0 */ - - /* restore HW registers */ - ////////////////////////////// - L_RESTORE_HWREG: - s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - - read_sgpr_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //M0 - read_sgpr_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //PC - read_sgpr_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) - read_sgpr_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //EXEC - read_sgpr_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) - read_sgpr_from_mem(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //STATUS - read_sgpr_from_mem(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //TRAPSTS - read_sgpr_from_mem(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //XNACK_MASK_LO - read_sgpr_from_mem(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //XNACK_MASK_HI - read_sgpr_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //MODE - read_sgpr_from_mem(tba_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //TBA_LO - read_sgpr_from_mem(tba_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //TBA_HI - - s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS - - s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS - - //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise: - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) - s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore) - s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over - end - if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL)) - s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4 //pc[31:0]+4 // save is hack through s_trap but restore is normal - s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over - end - - s_mov_b32 m0, s_restore_m0 - s_mov_b32 exec_lo, s_restore_exec_lo - s_mov_b32 exec_hi, s_restore_exec_hi - - s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts - s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0 - s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts - s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT - s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0 - //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore - s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode - //reuse s_restore_m0 as a temp register - s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT - s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT - s_mov_b32 s_restore_tmp, 0x0 //IB_STS is zero - s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 - s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT - s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT - s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 - s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT - s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp - s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status - - s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG //FIXME not performance-optimal at this time - - -// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution - s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc - - -/**************************************************************************/ -/* the END */ -/**************************************************************************/ -L_END_PGM: - s_endpgm - -end - - -/**************************************************************************/ -/* the helper functions */ -/**************************************************************************/ - -function write_sgpr_to_mem(s, s_rsrc, s_mem_offset, use_sqc, use_mtbuf) - if (use_sqc) - s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on - s_mov_b32 m0, s_mem_offset - s_buffer_store_dword s, s_rsrc, m0 glc:1 - s_add_u32 s_mem_offset, s_mem_offset, 4 - s_mov_b32 m0, exec_lo - elsif (use_mtbuf) - v_mov_b32 v0, s - tbuffer_store_format_x v0, v0, s_rsrc, s_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - s_add_u32 s_mem_offset, s_mem_offset, 256 - else - v_mov_b32 v0, s - buffer_store_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1 - s_add_u32 s_mem_offset, s_mem_offset, 256 - end -end - - - -function read_sgpr_from_mem(s, s_rsrc, s_mem_offset, use_sqc) - s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1 - if (use_sqc) - s_add_u32 s_mem_offset, s_mem_offset, 4 - else - s_add_u32 s_mem_offset, s_mem_offset, 256 - end -end - - - - - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_dc_restore.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_dc_restore.sp3 deleted file mode 100644 index 21130cd511..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_dc_restore.sp3 +++ /dev/null @@ -1,21 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(4) - v_mov_b32 v0, s0 - v_mov_b32 v1, s1 - v_mov_b32 v2, s2 - v_mov_b32 v3, s3 - flat_load_dword v4, v[0:1] slc - s_waitcnt vmcnt(0)&lgkmcnt(0) - v_mov_b32 v5, 0 - s_sleep 40000 -LOOP: - v_add_co_u32 v5, vcc, 1, v5 - s_waitcnt vmcnt(0)&lgkmcnt(0) - v_cmp_lt_u32 vcc, v5, v4 - s_cbranch_vccnz LOOP - flat_store_dword v[2,3], v5 - s_waitcnt vmcnt(0)&lgkmcnt(0) -s_endpgm -end diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_ea.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_ea.sp3 deleted file mode 100644 index 99147cd517..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_ea.sp3 +++ /dev/null @@ -1,69 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x80 - -STORE_LOOP: -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x100 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x100 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_mov_b32 s8, s30 -s_mov_b32 s31, 0x80 - -s_waitcnt 0 -LOAD_LOOP: -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x100 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x100 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 LOAD_LOOP - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_ea_02.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_ea_02.sp3 deleted file mode 100644 index 0b8e13bdb2..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_ea_02.sp3 +++ /dev/null @@ -1,131 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 - -s_waitcnt 0 - -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_add_u32 s31, s31, 0x4 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_ea_dirty.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_ea_dirty.sp3 deleted file mode 100644 index a3ba2787de..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_ea_dirty.sp3 +++ /dev/null @@ -1,61 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -s_mov_b32 s32, 0x4000 -v_mul_i32_i24 v9, v9, s32 - -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x10000 - -STORE_LOOP: -buffer_store_dword v0, v9, s24, s31 offen:1 -s_waitcnt 0 -s_add_u32 s31, s31, 0x10000 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_mov_b32 s16, 0xa5a50000 -s_store_dword s16, s[0:1], 0x40 glc - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_gds_02.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_gds_02.sp3 deleted file mode 100644 index 55f5ceec97..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_gds_02.sp3 +++ /dev/null @@ -1,79 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//read mem data -s_mov_b32 s31, 0x0 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:1 -s_waitcnt 0 - -//write it to GDS -s_mov_b32 s30, s8 -v_lshlrev_b32 v10, 2, v9 -s_mov_b32 m0, 0xFFFF -s_nop 0x1 -s_nop 0x1 -s_nop 0x1 - -STORE_LOOP: -ds_write_b32 v10, v0 gds:1 // GPU hang when GPU access the GDS with GFX queue -s_waitcnt 0 -v_add_u16 v10, v10, 0x10 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_mov_b32 s8, s30 -v_lshlrev_b32 v10, 2, v3 - -LOAD_LOOP: -ds_read_b32 v11, v10 gds:1 -s_waitcnt 0 -v_mov_b32 v12, v11 -v_add_u16 v10, v10, 0x10 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 LOAD_LOOP - -s_mov_b32 s16, 0xa5a50000 -s_store_dword s16, s[0:1], 0x40 glc - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_gds_address1.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_gds_address1.sp3 deleted file mode 100644 index 1df833258a..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_gds_address1.sp3 +++ /dev/null @@ -1,55 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(8) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset - -tgid_x_en(1) //s_tgid_x s8 -tgid_y_en(1) //s_tgid_y s9 -tgid_z_en(1) //s_tgid_z s10 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//read data from GDS -v_lshlrev_b32 v10, 2, v3 -s_mov_b32 m0, 0xFFFF -s_nop 1 -s_nop 1 -s_nop 1 -ds_read_b32 v11, v10 gds:1 -s_waitcnt 0 - -//write the data to memory -buffer_store_dword v11, v9, s24, s7 idxen:1 glc:1 -s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_gds_oa_phy.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_gds_oa_phy.sp3 deleted file mode 100644 index 93509f41cf..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_gds_oa_phy.sp3 +++ /dev/null @@ -1,68 +0,0 @@ -shader main - type(CS) - - - user_sgpr_count(4) - tgid_x_en(1) - tgid_y_en(1) - tgid_z_en(1) - - s_getreg_b32 s18, hwreg(HW_REG_HW_ID, 0, 32) - s_bfe_u32 s16, s18, 0x2001e // get meid - s_bfe_u32 s17, s18, 0x20006 // get pipeid - //s_add_u32 s17, s17, s16 - - // get ring id - v_mov_b32 v20, s17 - s_and_b32 s17, s17, 0x7 - - // Get thread_id inside wave - v_mbcnt_lo_u32_b32 v8, 0xffffffff, 0 - v_mbcnt_hi_u32_b32 v9, 0xffffffff, v8 - - s_waitcnt 0 - - // init: gds write address - v_mov_b32 v13, 0 - - // the first 128DW is for ordered-append counter - v_mov_b32 v14, 0x80 - - // offset ring - v_mov_b32 v15, 0x200 - - v_mul_lo_u32 v15, v15, v20 // ring offset - v_mov_b32 v16, 0x40 // wave_size - - v_mul_lo_u32 v18, v1, s1 - v_add_co_u32 v18, vcc, v18, v0 - v_lshrrev_b32 v17,6 ,v18 - s_mov_b32 s9, s12 - s_lshr_b32 s9, s9, 6 - s_and_b32 s9, s9, 0x7ff - s_lshl_b32 s17, s17, 18 - s_or_b32 s9, s9, s17 - s_mov_b32 m0, s9 - - v_mov_b32 v10, 1 - v_mov_b32 v11, 0 - ds_ordered_count v11, v10 gds:1 offset0:0 offset1:1 - s_waitcnt 0 - - v_mov_b32 v18, v11 - - v_mul_lo_u32 v16, v16, v18 // waves offset before. - v_add_co_u32 v13, vcc, v13, v14 - v_add_co_u32 v13, vcc, v13, v15 - v_add_co_u32 v13, vcc, v13, v16 - v_add_co_u32 v13, vcc, v13, v9 - - v_lshlrev_b32 v13,2,v13 - s_mov_b32 m0, 0x4000 - s_nop 0 - ds_write_b32 v13, v0 gds:1 - s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_lds_02.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_lds_02.sp3 deleted file mode 100644 index 780e9f9af7..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_lds_02.sp3 +++ /dev/null @@ -1,79 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//read mem data -s_mov_b32 s31, 0x0 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:1 -s_waitcnt 0 - -//store and load s8 times -s_mov_b32 s30, s8 -v_lshlrev_b32 v10, 2, v3 -s_mov_b32 m0, 0xFFFF -s_nop 0x1 -s_nop 0x1 -s_nop 0x1 - -STORE_LOOP: -ds_write_b32 v10, v0 -s_waitcnt 0 -v_add_u16 v10, v10, 0x10 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_mov_b32 s8, s30 -v_lshlrev_b32 v10, 2, v3 - -LOAD_LOOP: -ds_read_b32 v11, v10 -s_waitcnt 0 -v_mov_b32 v12, v11 -v_add_u16 v10, v10, 0x10 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 LOAD_LOOP - -s_mov_b32 s16, 0xa5a50000 -s_store_dword s16, s[0:1], 0x40 glc - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_lds_address1.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_lds_address1.sp3 deleted file mode 100644 index 09618ba90f..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_lds_address1.sp3 +++ /dev/null @@ -1,55 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(8) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset - -tgid_x_en(1) //s_tgid_x s8 -tgid_y_en(1) //s_tgid_y s9 -tgid_z_en(1) //s_tgid_z s10 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//read it from LDS -v_lshlrev_b32 v10, 2, v3 -s_mov_b32 m0, 0xFFFF -s_nop 1 -s_nop 1 -s_nop 1 -ds_read_b32 v0, v10 -s_waitcnt 0 - -//write the data to memory -buffer_store_dword v0, v9, s24, s7 idxen:1 glc:1 -s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_mc_read.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_mc_read.sp3 deleted file mode 100644 index 4c9f7ec36a..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_mc_read.sp3 +++ /dev/null @@ -1,52 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(8) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset - -tgid_x_en(1) //s_tgid_x s8 -tgid_y_en(1) //s_tgid_y s9 -tgid_z_en(1) //s_tgid_z s10 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//read mem data -s_mov_b32 s31, 0x0 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:1 -s_waitcnt 0 - - -//export poisoned data to L2 -buffer_store_dword v0, v9, s24, s7 idxen:1 glc:1 -s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_mc_read_tcc_halt.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_mc_read_tcc_halt.sp3 deleted file mode 100644 index e13d811bd4..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_mc_read_tcc_halt.sp3 +++ /dev/null @@ -1,77 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(8) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset - -tgid_x_en(1) //s_tgid_x s8 -tgid_y_en(1) //s_tgid_y s9 -tgid_z_en(1) //s_tgid_z s10 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//read mem data -s_mov_b32 s31, 0x0 -//For vega20, we need to set bit 12 low. This bit will just be set low here in the shader. -//s_mov_b32 s24, 0x15c000 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:1 -s_waitcnt 0 - -//store it 10 times -v_mov_b32 v10, v0 -v_mov_b32 v11, v0 -v_mov_b32 v12, v0 -v_mov_b32 v13, v0 -v_mov_b32 v14, v0 -v_mov_b32 v15, v0 -v_mov_b32 v16, v0 -v_mov_b32 v17, v0 -v_mov_b32 v18, v0 -v_mov_b32 v19, v0 - -// read them back -v_mov_b32 v29, v10 -v_mov_b32 v28, v11 -v_mov_b32 v27, v12 -v_mov_b32 v26, v13 -v_mov_b32 v25, v14 -v_mov_b32 v24, v15 -v_mov_b32 v23, v16 -v_mov_b32 v22, v17 -v_mov_b32 v21, v18 -v_mov_b32 v20, v19 - -//export poisoned data to L2 -buffer_store_dword v0, v9, s24, s7 idxen:1 glc:1 -s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_no_pollute.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_no_pollute.sp3 deleted file mode 100644 index e1c141b7bd..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_no_pollute.sp3 +++ /dev/null @@ -1,51 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(8) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset - -tgid_x_en(1) //s_tgid_x s8 -tgid_y_en(1) //s_tgid_y s9 -tgid_z_en(1) //s_tgid_z s10 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//read mem data -s_mov_b32 s31, 0x0 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:1 -s_waitcnt 0 - -//export poisoned data to L2 -buffer_store_dword v0, v9, s24, s7 idxen:1 glc:1 -s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sgpr_02.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sgpr_02.sp3 deleted file mode 100644 index 76b575d084..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sgpr_02.sp3 +++ /dev/null @@ -1,55 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//SPI may touch s0...sn before shader is run - -s_mov_b32 s16, s2 - -//write data -s_mov_b32 s30, s0 -s_mov_b32 s31, s1 -s_mov_b32 s32, s2 -s_mov_b32 s33, s3 -s_mov_b32 s34, s4 -s_mov_b32 s35, s5 -s_mov_b32 s36, s6 -s_mov_b32 s37, s7 -s_mov_b32 s38, s8 -s_mov_b32 s39, s9 - -//read back -s_mov_b32 s0, s30 -s_mov_b32 s1, s31 -s_mov_b32 s2, s32 -s_mov_b32 s3, s33 -s_mov_b32 s4, s34 -s_mov_b32 s5, s35 -s_mov_b32 s6, s36 -s_mov_b32 s7, s37 -s_mov_b32 s8, s38 -s_mov_b32 s9, s39 - -s_store_dword s16, s[0:1], 0x0 glc - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sh_atcl1.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sh_atcl1.sp3 deleted file mode 100644 index 4058f5b7dc..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sh_atcl1.sp3 +++ /dev/null @@ -1,75 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_load_dwordx4 s[20:23], s[0:1], 16 // load atc mem surface rsrc -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - - -var MTYPE_UC = 0x38000000 -s_or_b32 s27, s27, MTYPE_UC - -s_mov_b32 s30, s8 -s_mov_b32 m0, 0x0 - - -STORE_LOOP: -s_buffer_store_dword s8, s[20:23], m0 glc:1 -s_waitcnt 0 -s_add_u32 m0, m0, 4*1024 // step one 4KB page table address -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -var DEBUG_FUNCTION = 0 -// Remove function check code to half shader run time... -if DEBUG_FUNCTION -s_mov_b32 s8, s30 -s_mov_b32 m0, 0x0 - -LOAD_LOOP: -s_buffer_load_dword s0, s[20:23], m0 glc:1 -s_waitcnt 0 -s_add_u32 m0, m0, 4*1024 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 LOAD_LOOP -end - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sqc_data_bank.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sqc_data_bank.sp3 deleted file mode 100644 index fe1b4c2a60..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sqc_data_bank.sp3 +++ /dev/null @@ -1,96 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -/* -s_bfe_u32 s33, s8, 0x20004 // extract bank select bits -s_lshl_b32 s33, s33, 6 // ((bank_sel & 0x3) << 6) , bank_sel = address[9:8] ^ address[7:6], if 4 bank enabled -s_and_b32 s8, s8, 0xf -*/ - -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - - -s_or_b32 s26, s26, 0x1000 //hack the buffer size to enough - -STORE_LOOP: - -var TOUCH_4_BANKS=1 -if TOUCH_4_BANKS -s_mov_b32 m0, 0x0 // BANKA -s_buffer_store_dword s8, s[24:27], m0 glc:1 -s_waitcnt 0 -s_buffer_load_dword s32, s[24:27], m0 glc:1 -s_waitcnt 0 - - -s_mov_b32 m0, 0x40 // BANKB -s_buffer_store_dword s8, s[24:27], m0 glc:1 -s_waitcnt 0 -s_buffer_load_dword s32, s[24:27], m0 glc:1 -s_waitcnt 0 - -/* -s_mov_b32 m0, 0x80 // BANKC -s_buffer_store_dword s8, s[24:27], m0 glc:1 -s_waitcnt 0 -s_buffer_load_dword s32, s[24:27], m0 glc:1 -s_waitcnt 0 - - -s_mov_b32 m0, 0xC0 // BANKD -s_buffer_store_dword s8, s[24:27], m0 glc:1 -s_waitcnt 0 -s_buffer_load_dword s32, s[24:27], m0 glc:1 -s_waitcnt 0 -*/ - -end - -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_dcache_wb // to make emu, sim img match... - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sqc_data_bank_tag.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sqc_data_bank_tag.sp3 deleted file mode 100644 index f7df170170..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sqc_data_bank_tag.sp3 +++ /dev/null @@ -1,96 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -/* -s_bfe_u32 s33, s8, 0x20004 // extract bank select bits -s_lshl_b32 s33, s33, 6 // ((bank_sel & 0x3) << 6) , bank_sel = address[9:8] ^ address[7:6], if 4 bank enabled -s_and_b32 s8, s8, 0xf -*/ - -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - - -s_or_b32 s26, s26, 0x1000 //hack the buffer size to enough - -STORE_LOOP: - -var TOUCH_4_BANKS=1 -if TOUCH_4_BANKS -s_mov_b32 m0, 0x0 // BANKA -s_buffer_store_dword s8, s[24:27], m0 glc:0 -s_waitcnt 0 -s_buffer_load_dword s32, s[24:27], m0 glc:0 -s_waitcnt 0 - - -s_mov_b32 m0, 0x40 // BANKB -s_buffer_store_dword s8, s[24:27], m0 glc:0 -s_waitcnt 0 -s_buffer_load_dword s32, s[24:27], m0 glc:0 -s_waitcnt 0 - -/* -s_mov_b32 m0, 0x80 // BANKC -s_buffer_store_dword s8, s[24:27], m0 glc:1 -s_waitcnt 0 -s_buffer_load_dword s32, s[24:27], m0 glc:1 -s_waitcnt 0 - - -s_mov_b32 m0, 0xC0 // BANKD -s_buffer_store_dword s8, s[24:27], m0 glc:1 -s_waitcnt 0 -s_buffer_load_dword s32, s[24:27], m0 glc:1 -s_waitcnt 0 -*/ - -end - -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_dcache_wb // to make emu, sim img match... - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sqc_dirty_bit.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sqc_dirty_bit.sp3 deleted file mode 100644 index dd766baed3..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sqc_dirty_bit.sp3 +++ /dev/null @@ -1,112 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -s_or_b32 s27, s27, 0x8000000 // changing mtype to non volatile -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - - -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - -s_mov_b32 s9, 0xaa -s_mov_b32 s10, 0xbb -s_mov_b32 s11, 0xcc - -// BUFFER STORE OFFSETS FOR BANK A AND BANKB -s_mov_b32 s12, 0x0 -s_mov_b32 s13, 0x10 -s_mov_b32 s14, 0x40 -s_mov_b32 s15, 0x50 - - -// The following sequence is needed to inject error in dirty bit ram. Sequence was provided by SQC designer 4/1/2015 -//1. you have an invalid line in data cache, -//2. you write to some of the dwords in that line (the remaining dwords are still invalid), -//3. then there is a read request that hit on that line, but it needs the dwords that are not yet there in that line -//(in other words, it needs some of the invalid dwords of that line), -//4. the request will go to TC, -//5. when TC return comes back, the dirty bit rm will be read - -STORE_LOOP: - -var TOUCH_4_BANKS=1 -if TOUCH_4_BANKS - -s_mov_b32 m0, s13 // BANKA write one dword to tc -s_buffer_store_dwordx2 s[8:9], s[24:27], m0 glc:1 -s_waitcnt 0 - - -s_mov_b32 m0, s12 // BANKA. write one dword to sqc -s_buffer_store_dwordx2 s[10:11], s[24:27], m0 glc:0 -s_waitcnt 0 - -s_mov_b32 m0, s13 // BANK A read the dword that is not in cache -s_buffer_load_dword s32, s[24:27], m0 glc:0 -s_waitcnt 0 - -s_mov_b32 m0, s15 // BANKB write one dword to tc -s_buffer_store_dwordx2 s[8:9], s[24:27], m0 glc:1 -s_waitcnt 0 - -s_mov_b32 m0, s14 // BANKB write one dword to sqc -s_buffer_store_dwordx2 s[10:11], s[24:27], m0 glc:0 -s_waitcnt 0 - -s_mov_b32 m0, s15 // BANK B read the dword that is not in cache -s_buffer_load_dword s32, s[24:27], m0 glc:0 -s_waitcnt 0 -end - -s_add_u32 s12, s12,0x80 -s_add_u32 s13, s13,0x80 -s_add_u32 s14, s14,0x80 -s_add_u32 s15, s15,0x80 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_dcache_wb // to make emu, sim img match... - -s_mov_b32 s16, 0xa5a50000 -s_store_dword s16, s[0:1], 0x40 glc - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sqc_inst_atcl1.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sqc_inst_atcl1.sp3 deleted file mode 100644 index 3d3186d362..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sqc_inst_atcl1.sp3 +++ /dev/null @@ -1,63 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_load_dwordx4 s[20:23], s[0:1], 16 // load atc mem surface rsrc -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - - -var MTYPE_UC = 0x38000000 -s_or_b32 s27, s27, MTYPE_UC - -label inst_page[34+1] // 34 4k pages - -for var i =0; i < 34; i++ -inst_page[i]: - //each block are 4k side... - s_cbranch_execnz inst_page[i+1] //1 dword - for var j = 0; j < (4*1024)/4 -1; j++ - v_mov_b32 v0, 0 // each with 1 dword - end - -end -inst_page[34]: - - - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sqc_inst_bank.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sqc_inst_bank.sp3 deleted file mode 100644 index 64e1084623..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sqc_inst_bank.sp3 +++ /dev/null @@ -1,69 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -// don't care about the loop count, fix 8 loops -// Totaly number of cacheline equals 2(A,B,)*8 - -var num_cache_lines = 16 -label BLOCK_64B[num_cache_lines] - - -for var loop = 0; loop < num_cache_lines - 1; loop++ -BLOCK_64B[loop]: - s_branch BLOCK_64B[loop+1] // 1DW - for var i = 0; i < 15; i++ - v_nop - end - -end - -// last block -for var i = 0; i < 15; i++ - v_nop -end -//For uei 2 msb and lsb flipped -// s_nop will become v_nop and it will a legal instruction -BLOCK_64B[num_cache_lines-1]: - for var i = 0; i < 81; i++ - s_nop 0x1 - end -s_endpgm -end - -/** comment, four bank interleave -Addr 0x90000000 => Bank A -Addr 0x90000040 => Bank B -Addr 0x90000080 => Bank C -Addr 0x900000c0 => Bank D -Addr 0x90000100 => Bank B -Addr 0x90000140 => Bank A -Addr 0x90000180 => Bank D -Addr 0x900001c0 => Bank C -Addr 0x90000200 => Bank C -Addr 0x90000240 => Bank D -Addr 0x90000280 => Bank A -Addr 0x900002c0 => Bank B -Addr 0x90000300 => Bank D -Addr 0x90000340 => Bank C -Addr 0x90000380 => Bank B - -**/ diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sqc_inst_bank_snop.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sqc_inst_bank_snop.sp3 deleted file mode 100644 index ba62535613..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_sqc_inst_bank_snop.sp3 +++ /dev/null @@ -1,29 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - - for var i = 0; i < 1000; i++ - s_nop 0x1 - end - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_ta_address1.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_ta_address1.sp3 deleted file mode 100644 index 04e7a3a285..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_ta_address1.sp3 +++ /dev/null @@ -1,51 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(8) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset - -tgid_x_en(1) //s_tgid_x s8 -tgid_y_en(1) //s_tgid_y s9 -tgid_z_en(1) //s_tgid_z s10 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//read from memory -s_mov_b32 s31, 0x0 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:1 -s_waitcnt 0 - -//write the data to memory -buffer_store_dword v0, v9, s24, s7 idxen:1 glc:1 -s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tc_02.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tc_02.sp3 deleted file mode 100644 index 46b8b9b0a0..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tc_02.sp3 +++ /dev/null @@ -1,73 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -s_mov_b32 s16, 0xa5a50001 -s_store_dword s16, s[0:1], 0x40 glc - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - -STORE_LOOP: -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_waitcnt 0 -s_add_u32 s31, s31, 0x4 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_mov_b32 s8, s30 -s_mov_b32 s31, 0x0 - -LOAD_LOOP: -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_waitcnt 0 -v_mov_b32 v12, v0 -s_add_u32 s31, s31, 0x4 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 LOAD_LOOP - -s_mov_b32 s16, 0xa5a50000 -s_store_dword s16, s[0:1], 0x40 glc - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tc_02_ea.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tc_02_ea.sp3 deleted file mode 100644 index a374f8454d..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tc_02_ea.sp3 +++ /dev/null @@ -1,71 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -//For vega20, we need to set bit 12 low to steer traffic to ea0 -s_mov_b32 s32, 0xFFFFEFFF -s_and_b32 s24, s24, s32 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - -STORE_LOOP: -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_waitcnt 0 -s_add_u32 s31, s31, 0x4 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_mov_b32 s8, s30 -s_mov_b32 s31, 0x0 - -LOAD_LOOP: -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_waitcnt 0 -v_mov_b32 v12, v0 -s_add_u32 s31, s31, 0x4 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 LOAD_LOOP - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tc_atomic_02.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tc_atomic_02.sp3 deleted file mode 100644 index 70439d9d5c..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tc_atomic_02.sp3 +++ /dev/null @@ -1,345 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -//set bit 12 low to select EA0 -s_mov_b32 s32, 0xFFFFEFFF -s_and_b32 s24, s24, s32 - -s_and_b32 s31, s9, 0x1 -s_cmpk_eq_i32 s31, 0x1 -s_cbranch_scc1 ODD_WAVES - -//set bit 12 high to select EA1 -s_mov_b32 s32, 0x1000 -s_or_b32 s24, s24, s32 - -ODD_WAVES: - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -v_mul_i32_i24 v3, v3, 0x10 -v_mul_i32_i24 v9, v9, 0x10 - -s_mov_b32 s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 - -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 - -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 - -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 - -s_barrier - -s_mov_b32 s31, 0xF0000 -s_mov_b32 s32, 0x6000 - -//store and load s8 times -s_mov_b32 s30, s8 - -s_cmpk_lt_i32 s9, 0x2 -s_cbranch_scc1 ATOMIC_LOOP - -LOAD_LOOP: - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -s_sub_u32 s30, s30, 1 -s_cmpk_eq_u32 s30, 0 -s_cbranch_scc0 LOAD_LOOP - - -s_cmpk_ge_i32 s9, 0x2 -s_cbranch_scc1 END - -ATOMIC_LOOP: - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -s_sub_u32 s30, s30, 1 -s_cmpk_eq_u32 s30, 0 -s_cbranch_scc0 ATOMIC_LOOP - -//s_waitcnt 0 - -END: -s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tc_atomic_03.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tc_atomic_03.sp3 deleted file mode 100644 index bd4c14ba21..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tc_atomic_03.sp3 +++ /dev/null @@ -1,509 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -//set bit 12 low to select EA0 -s_mov_b32 s32, 0xFFFFEFFF -s_and_b32 s24, s24, s32 - -s_and_b32 s31, s9, 0x1 -s_cmpk_eq_i32 s31, 0x1 -s_cbranch_scc1 ODD_WAVES - -//set bit 12 high to select EA1 -s_mov_b32 s32, 0x1000 -s_or_b32 s24, s24, s32 - -ODD_WAVES: - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -v_mul_i32_i24 v3, v3, 0x10 -v_mul_i32_i24 v9, v9, 0x10 - -s_mov_b32 s31, 0x9000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 - -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 - -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 - -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 - -s_barrier - -s_mov_b32 s31, 0xF0000 -s_mov_b32 s32, 0x9000 - -//store and load s8 times -s_mov_b32 s30, s8 - -s_cmpk_lt_i32 s9, 0x2 -s_cbranch_scc1 ATOMIC_LOOP - -s_mov_b32 s20, 0x1 - -LOAD_LOOP: - -s_atomic_add s20, s0, 0x100000 -s_atomic_add s20, s0, 0x100010 -s_atomic_add s20, s0, 0x100020 -s_atomic_add s20, s0, 0x100030 -s_atomic_add s20, s0, 0x100040 -s_atomic_add s20, s0, 0x100050 -s_atomic_add s20, s0, 0x100060 -s_atomic_add s20, s0, 0x100070 -s_atomic_add s20, s0, 0x100080 -s_atomic_add s20, s0, 0x100090 - -s_atomic_add s20, s0, 0x100100 -s_atomic_add s20, s0, 0x100110 -s_atomic_add s20, s0, 0x100120 -s_atomic_add s20, s0, 0x100130 -s_atomic_add s20, s0, 0x100140 -s_atomic_add s20, s0, 0x100150 -s_atomic_add s20, s0, 0x100160 -s_atomic_add s20, s0, 0x100170 -s_atomic_add s20, s0, 0x100180 -s_atomic_add s20, s0, 0x100190 - -s_atomic_add s20, s0, 0x100200 -s_atomic_add s20, s0, 0x100210 -s_atomic_add s20, s0, 0x100220 -s_atomic_add s20, s0, 0x100230 -s_atomic_add s20, s0, 0x100240 -s_atomic_add s20, s0, 0x100250 -s_atomic_add s20, s0, 0x100260 -s_atomic_add s20, s0, 0x100270 -s_atomic_add s20, s0, 0x100280 -s_atomic_add s20, s0, 0x100290 - -s_atomic_add s20, s0, 0x100300 -s_atomic_add s20, s0, 0x100310 -s_atomic_add s20, s0, 0x100320 -s_atomic_add s20, s0, 0x100330 -s_atomic_add s20, s0, 0x100340 -s_atomic_add s20, s0, 0x100350 -s_atomic_add s20, s0, 0x100360 -s_atomic_add s20, s0, 0x100370 -s_atomic_add s20, s0, 0x100380 -s_atomic_add s20, s0, 0x100390 - -s_atomic_add s20, s0, 0x100400 -s_atomic_add s20, s0, 0x100404 -s_atomic_add s20, s0, 0x100408 -s_atomic_add s20, s0, 0x10040c -s_atomic_add s20, s0, 0x100410 -s_atomic_add s20, s0, 0x100414 -s_atomic_add s20, s0, 0x100418 -s_atomic_add s20, s0, 0x10041c -s_atomic_add s20, s0, 0x100420 -s_atomic_add s20, s0, 0x100424 -s_atomic_add s20, s0, 0x100428 -s_atomic_add s20, s0, 0x10042c - -s_atomic_add s20, s0, 0x100500 -s_atomic_add s20, s0, 0x100504 -s_atomic_add s20, s0, 0x100508 -s_atomic_add s20, s0, 0x10050c -s_atomic_add s20, s0, 0x100510 -s_atomic_add s20, s0, 0x100514 -s_atomic_add s20, s0, 0x100518 -s_atomic_add s20, s0, 0x10051c -s_atomic_add s20, s0, 0x100520 -s_atomic_add s20, s0, 0x100524 -s_atomic_add s20, s0, 0x100528 -s_atomic_add s20, s0, 0x10052c - -s_atomic_add s20, s0, 0x100600 -s_atomic_add s20, s0, 0x100604 -s_atomic_add s20, s0, 0x100608 -s_atomic_add s20, s0, 0x10060c -s_atomic_add s20, s0, 0x100610 -s_atomic_add s20, s0, 0x100614 -s_atomic_add s20, s0, 0x100618 -s_atomic_add s20, s0, 0x10061c -s_atomic_add s20, s0, 0x100620 -s_atomic_add s20, s0, 0x100624 -s_atomic_add s20, s0, 0x100628 -s_atomic_add s20, s0, 0x10062c - -s_atomic_add s20, s0, 0x100700 -s_atomic_add s20, s0, 0x100704 -s_atomic_add s20, s0, 0x100708 -s_atomic_add s20, s0, 0x10070c -s_atomic_add s20, s0, 0x100710 -s_atomic_add s20, s0, 0x100714 -s_atomic_add s20, s0, 0x100718 -s_atomic_add s20, s0, 0x10071c -s_atomic_add s20, s0, 0x100720 -s_atomic_add s20, s0, 0x100724 -s_atomic_add s20, s0, 0x100728 -s_atomic_add s20, s0, 0x10072c - -s_atomic_add s20, s0, 0x100800 -s_atomic_add s20, s0, 0x100804 -s_atomic_add s20, s0, 0x100808 -s_atomic_add s20, s0, 0x10080c -s_atomic_add s20, s0, 0x100810 -s_atomic_add s20, s0, 0x100814 -s_atomic_add s20, s0, 0x100818 -s_atomic_add s20, s0, 0x10081c -s_atomic_add s20, s0, 0x100820 -s_atomic_add s20, s0, 0x100824 -s_atomic_add s20, s0, 0x100828 -s_atomic_add s20, s0, 0x10082c - -s_atomic_add s20, s0, 0x100900 -s_atomic_add s20, s0, 0x100904 -s_atomic_add s20, s0, 0x100908 -s_atomic_add s20, s0, 0x10090c -s_atomic_add s20, s0, 0x100910 -s_atomic_add s20, s0, 0x100914 -s_atomic_add s20, s0, 0x100918 -s_atomic_add s20, s0, 0x10091c -s_atomic_add s20, s0, 0x100920 -s_atomic_add s20, s0, 0x100924 -s_atomic_add s20, s0, 0x100928 -s_atomic_add s20, s0, 0x10092c - -s_atomic_add s20, s0, 0x100a00 -s_atomic_add s20, s0, 0x100a04 -s_atomic_add s20, s0, 0x100a08 -s_atomic_add s20, s0, 0x100a0c -s_atomic_add s20, s0, 0x100a10 -s_atomic_add s20, s0, 0x100a14 -s_atomic_add s20, s0, 0x100a18 -s_atomic_add s20, s0, 0x100a1c -s_atomic_add s20, s0, 0x100a20 -s_atomic_add s20, s0, 0x100a24 -s_atomic_add s20, s0, 0x100a28 -s_atomic_add s20, s0, 0x100a2c - -s_atomic_add s20, s0, 0x100b00 -s_atomic_add s20, s0, 0x100b04 -s_atomic_add s20, s0, 0x100b08 -s_atomic_add s20, s0, 0x100b0c -s_atomic_add s20, s0, 0x100b10 -s_atomic_add s20, s0, 0x100b14 -s_atomic_add s20, s0, 0x100b18 -s_atomic_add s20, s0, 0x100b1c -s_atomic_add s20, s0, 0x100b20 -s_atomic_add s20, s0, 0x100b24 -s_atomic_add s20, s0, 0x100b28 -s_atomic_add s20, s0, 0x100b2c - -s_atomic_add s20, s0, 0x100c00 -s_atomic_add s20, s0, 0x100c04 -s_atomic_add s20, s0, 0x100c08 -s_atomic_add s20, s0, 0x100c0c -s_atomic_add s20, s0, 0x100c10 -s_atomic_add s20, s0, 0x100c14 -s_atomic_add s20, s0, 0x100c18 -s_atomic_add s20, s0, 0x100c1c -s_atomic_add s20, s0, 0x100c20 -s_atomic_add s20, s0, 0x100c24 -s_atomic_add s20, s0, 0x100c28 -s_atomic_add s20, s0, 0x100c2c - - -s_sub_u32 s30, s30, 1 -s_cmpk_eq_u32 s30, 0 -s_cbranch_scc0 LOAD_LOOP - - -s_cmpk_ge_i32 s9, 0x2 -s_cbranch_scc1 END - -ATOMIC_LOOP: - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -s_sub_u32 s30, s30, 1 -s_cmpk_eq_u32 s30, 0 -s_cbranch_scc0 ATOMIC_LOOP - -//s_waitcnt 0 - -END: -s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tc_atomic_04.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tc_atomic_04.sp3 deleted file mode 100644 index c93b2992f0..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tc_atomic_04.sp3 +++ /dev/null @@ -1,2816 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -//set bit 12 low to select EA0 -s_mov_b32 s32, 0xFFFFEFFF -s_and_b32 s24, s24, s32 - -s_and_b32 s31, s9, 0x1 -s_cmpk_eq_i32 s31, 0x1 -s_cbranch_scc1 ODD_WAVES - -//set bit 12 high to select EA1 -s_mov_b32 s32, 0x1000 -s_or_b32 s24, s24, s32 - -ODD_WAVES: - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -v_mul_i32_i24 v3, v3, 0x10 -v_mul_i32_i24 v9, v9, 0x10 - -s_mov_b32 s31, 0x9000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 - -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 - -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 - -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 - -s_barrier - -s_mov_b32 s31, 0xF0000 -s_mov_b32 s32, 0x9000 - -//store and load s8 times -s_mov_b32 s30, s8 - -s_cmpk_lt_i32 s9, 0x2 -s_cbranch_scc1 ATOMIC_LOOP - -s_mov_b32 s20, 0x1 - -LOAD_LOOP: - -s_atomic_add s20, s0, 0x100000 -s_atomic_add s20, s0, 0x100010 -s_atomic_add s20, s0, 0x100020 -s_atomic_add s20, s0, 0x100030 -s_atomic_add s20, s0, 0x100040 -s_atomic_add s20, s0, 0x100050 -s_atomic_add s20, s0, 0x100060 -s_atomic_add s20, s0, 0x100070 -s_atomic_add s20, s0, 0x100080 -s_atomic_add s20, s0, 0x100090 - -s_atomic_add s20, s0, 0x100100 -s_atomic_add s20, s0, 0x100110 -s_atomic_add s20, s0, 0x100120 -s_atomic_add s20, s0, 0x100130 -s_atomic_add s20, s0, 0x100140 -s_atomic_add s20, s0, 0x100150 -s_atomic_add s20, s0, 0x100160 -s_atomic_add s20, s0, 0x100170 -s_atomic_add s20, s0, 0x100180 -s_atomic_add s20, s0, 0x100190 - -s_atomic_add s20, s0, 0x100200 -s_atomic_add s20, s0, 0x100210 -s_atomic_add s20, s0, 0x100220 -s_atomic_add s20, s0, 0x100230 -s_atomic_add s20, s0, 0x100240 -s_atomic_add s20, s0, 0x100250 -s_atomic_add s20, s0, 0x100260 -s_atomic_add s20, s0, 0x100270 -s_atomic_add s20, s0, 0x100280 -s_atomic_add s20, s0, 0x100290 - -s_atomic_add s20, s0, 0x100300 -s_atomic_add s20, s0, 0x100310 -s_atomic_add s20, s0, 0x100320 -s_atomic_add s20, s0, 0x100330 -s_atomic_add s20, s0, 0x100340 -s_atomic_add s20, s0, 0x100350 -s_atomic_add s20, s0, 0x100360 -s_atomic_add s20, s0, 0x100370 -s_atomic_add s20, s0, 0x100380 -s_atomic_add s20, s0, 0x100390 - -s_atomic_add s20, s0, 0x100400 -s_atomic_add s20, s0, 0x100404 -s_atomic_add s20, s0, 0x100408 -s_atomic_add s20, s0, 0x10040c -s_atomic_add s20, s0, 0x100410 -s_atomic_add s20, s0, 0x100414 -s_atomic_add s20, s0, 0x100418 -s_atomic_add s20, s0, 0x10041c -s_atomic_add s20, s0, 0x100420 -s_atomic_add s20, s0, 0x100424 -s_atomic_add s20, s0, 0x100428 -s_atomic_add s20, s0, 0x10042c - -s_atomic_add s20, s0, 0x100500 -s_atomic_add s20, s0, 0x100504 -s_atomic_add s20, s0, 0x100508 -s_atomic_add s20, s0, 0x10050c -s_atomic_add s20, s0, 0x100510 -s_atomic_add s20, s0, 0x100514 -s_atomic_add s20, s0, 0x100518 -s_atomic_add s20, s0, 0x10051c -s_atomic_add s20, s0, 0x100520 -s_atomic_add s20, s0, 0x100524 -s_atomic_add s20, s0, 0x100528 -s_atomic_add s20, s0, 0x10052c - -s_atomic_add s20, s0, 0x100600 -s_atomic_add s20, s0, 0x100604 -s_atomic_add s20, s0, 0x100608 -s_atomic_add s20, s0, 0x10060c -s_atomic_add s20, s0, 0x100610 -s_atomic_add s20, s0, 0x100614 -s_atomic_add s20, s0, 0x100618 -s_atomic_add s20, s0, 0x10061c -s_atomic_add s20, s0, 0x100620 -s_atomic_add s20, s0, 0x100624 -s_atomic_add s20, s0, 0x100628 -s_atomic_add s20, s0, 0x10062c - -s_atomic_add s20, s0, 0x100700 -s_atomic_add s20, s0, 0x100704 -s_atomic_add s20, s0, 0x100708 -s_atomic_add s20, s0, 0x10070c -s_atomic_add s20, s0, 0x100710 -s_atomic_add s20, s0, 0x100714 -s_atomic_add s20, s0, 0x100718 -s_atomic_add s20, s0, 0x10071c -s_atomic_add s20, s0, 0x100720 -s_atomic_add s20, s0, 0x100724 -s_atomic_add s20, s0, 0x100728 -s_atomic_add s20, s0, 0x10072c - -s_atomic_add s20, s0, 0x100800 -s_atomic_add s20, s0, 0x100804 -s_atomic_add s20, s0, 0x100808 -s_atomic_add s20, s0, 0x10080c -s_atomic_add s20, s0, 0x100810 -s_atomic_add s20, s0, 0x100814 -s_atomic_add s20, s0, 0x100818 -s_atomic_add s20, s0, 0x10081c -s_atomic_add s20, s0, 0x100820 -s_atomic_add s20, s0, 0x100824 -s_atomic_add s20, s0, 0x100828 -s_atomic_add s20, s0, 0x10082c - -s_atomic_add s20, s0, 0x100900 -s_atomic_add s20, s0, 0x100904 -s_atomic_add s20, s0, 0x100908 -s_atomic_add s20, s0, 0x10090c -s_atomic_add s20, s0, 0x100910 -s_atomic_add s20, s0, 0x100914 -s_atomic_add s20, s0, 0x100918 -s_atomic_add s20, s0, 0x10091c -s_atomic_add s20, s0, 0x100920 -s_atomic_add s20, s0, 0x100924 -s_atomic_add s20, s0, 0x100928 -s_atomic_add s20, s0, 0x10092c - -s_atomic_add s20, s0, 0x100a00 -s_atomic_add s20, s0, 0x100a04 -s_atomic_add s20, s0, 0x100a08 -s_atomic_add s20, s0, 0x100a0c -s_atomic_add s20, s0, 0x100a10 -s_atomic_add s20, s0, 0x100a14 -s_atomic_add s20, s0, 0x100a18 -s_atomic_add s20, s0, 0x100a1c -s_atomic_add s20, s0, 0x100a20 -s_atomic_add s20, s0, 0x100a24 -s_atomic_add s20, s0, 0x100a28 -s_atomic_add s20, s0, 0x100a2c - -s_atomic_add s20, s0, 0x100b00 -s_atomic_add s20, s0, 0x100b04 -s_atomic_add s20, s0, 0x100b08 -s_atomic_add s20, s0, 0x100b0c -s_atomic_add s20, s0, 0x100b10 -s_atomic_add s20, s0, 0x100b14 -s_atomic_add s20, s0, 0x100b18 -s_atomic_add s20, s0, 0x100b1c -s_atomic_add s20, s0, 0x100b20 -s_atomic_add s20, s0, 0x100b24 -s_atomic_add s20, s0, 0x100b28 -s_atomic_add s20, s0, 0x100b2c - -s_atomic_add s20, s0, 0x100c00 -s_atomic_add s20, s0, 0x100c04 -s_atomic_add s20, s0, 0x100c08 -s_atomic_add s20, s0, 0x100c0c -s_atomic_add s20, s0, 0x100c10 -s_atomic_add s20, s0, 0x100c14 -s_atomic_add s20, s0, 0x100c18 -s_atomic_add s20, s0, 0x100c1c -s_atomic_add s20, s0, 0x100c20 -s_atomic_add s20, s0, 0x100c24 -s_atomic_add s20, s0, 0x100c28 -s_atomic_add s20, s0, 0x100c2c - -s_atomic_add s20, s0, 0x101000 -s_atomic_add s20, s0, 0x101010 -s_atomic_add s20, s0, 0x101020 -s_atomic_add s20, s0, 0x101030 -s_atomic_add s20, s0, 0x101040 -s_atomic_add s20, s0, 0x101050 -s_atomic_add s20, s0, 0x101060 -s_atomic_add s20, s0, 0x101070 -s_atomic_add s20, s0, 0x101080 -s_atomic_add s20, s0, 0x101090 - -s_atomic_add s20, s0, 0x101100 -s_atomic_add s20, s0, 0x101110 -s_atomic_add s20, s0, 0x101120 -s_atomic_add s20, s0, 0x101130 -s_atomic_add s20, s0, 0x101140 -s_atomic_add s20, s0, 0x101150 -s_atomic_add s20, s0, 0x101160 -s_atomic_add s20, s0, 0x101170 -s_atomic_add s20, s0, 0x101180 -s_atomic_add s20, s0, 0x101190 - -s_atomic_add s20, s0, 0x101200 -s_atomic_add s20, s0, 0x101210 -s_atomic_add s20, s0, 0x101220 -s_atomic_add s20, s0, 0x101230 -s_atomic_add s20, s0, 0x101240 -s_atomic_add s20, s0, 0x101250 -s_atomic_add s20, s0, 0x101260 -s_atomic_add s20, s0, 0x101270 -s_atomic_add s20, s0, 0x101280 -s_atomic_add s20, s0, 0x101290 - -s_atomic_add s20, s0, 0x101300 -s_atomic_add s20, s0, 0x101310 -s_atomic_add s20, s0, 0x101320 -s_atomic_add s20, s0, 0x101330 -s_atomic_add s20, s0, 0x101340 -s_atomic_add s20, s0, 0x101350 -s_atomic_add s20, s0, 0x101360 -s_atomic_add s20, s0, 0x101370 -s_atomic_add s20, s0, 0x101380 -s_atomic_add s20, s0, 0x101390 - -s_atomic_add s20, s0, 0x101400 -s_atomic_add s20, s0, 0x101404 -s_atomic_add s20, s0, 0x101408 -s_atomic_add s20, s0, 0x10140c -s_atomic_add s20, s0, 0x101410 -s_atomic_add s20, s0, 0x101414 -s_atomic_add s20, s0, 0x101418 -s_atomic_add s20, s0, 0x10141c -s_atomic_add s20, s0, 0x101420 -s_atomic_add s20, s0, 0x101424 -s_atomic_add s20, s0, 0x101428 -s_atomic_add s20, s0, 0x10142c - -s_atomic_add s20, s0, 0x101500 -s_atomic_add s20, s0, 0x101504 -s_atomic_add s20, s0, 0x101508 -s_atomic_add s20, s0, 0x10150c -s_atomic_add s20, s0, 0x101510 -s_atomic_add s20, s0, 0x101514 -s_atomic_add s20, s0, 0x101518 -s_atomic_add s20, s0, 0x10151c -s_atomic_add s20, s0, 0x101520 -s_atomic_add s20, s0, 0x101524 -s_atomic_add s20, s0, 0x101528 -s_atomic_add s20, s0, 0x10152c - -s_atomic_add s20, s0, 0x101600 -s_atomic_add s20, s0, 0x101604 -s_atomic_add s20, s0, 0x101608 -s_atomic_add s20, s0, 0x10160c -s_atomic_add s20, s0, 0x101610 -s_atomic_add s20, s0, 0x101614 -s_atomic_add s20, s0, 0x101618 -s_atomic_add s20, s0, 0x10161c -s_atomic_add s20, s0, 0x101620 -s_atomic_add s20, s0, 0x101624 -s_atomic_add s20, s0, 0x101628 -s_atomic_add s20, s0, 0x10162c - -s_atomic_add s20, s0, 0x101700 -s_atomic_add s20, s0, 0x101704 -s_atomic_add s20, s0, 0x101708 -s_atomic_add s20, s0, 0x10170c -s_atomic_add s20, s0, 0x101710 -s_atomic_add s20, s0, 0x101714 -s_atomic_add s20, s0, 0x101718 -s_atomic_add s20, s0, 0x10171c -s_atomic_add s20, s0, 0x101720 -s_atomic_add s20, s0, 0x101724 -s_atomic_add s20, s0, 0x101728 -s_atomic_add s20, s0, 0x10172c - -s_atomic_add s20, s0, 0x101800 -s_atomic_add s20, s0, 0x101804 -s_atomic_add s20, s0, 0x101808 -s_atomic_add s20, s0, 0x10180c -s_atomic_add s20, s0, 0x101810 -s_atomic_add s20, s0, 0x101814 -s_atomic_add s20, s0, 0x101818 -s_atomic_add s20, s0, 0x10181c -s_atomic_add s20, s0, 0x101820 -s_atomic_add s20, s0, 0x101824 -s_atomic_add s20, s0, 0x101828 -s_atomic_add s20, s0, 0x10182c - -s_atomic_add s20, s0, 0x101900 -s_atomic_add s20, s0, 0x101904 -s_atomic_add s20, s0, 0x101908 -s_atomic_add s20, s0, 0x10190c -s_atomic_add s20, s0, 0x101910 -s_atomic_add s20, s0, 0x101914 -s_atomic_add s20, s0, 0x101918 -s_atomic_add s20, s0, 0x10191c -s_atomic_add s20, s0, 0x101920 -s_atomic_add s20, s0, 0x101924 -s_atomic_add s20, s0, 0x101928 -s_atomic_add s20, s0, 0x10192c - -s_atomic_add s20, s0, 0x101a00 -s_atomic_add s20, s0, 0x101a04 -s_atomic_add s20, s0, 0x101a08 -s_atomic_add s20, s0, 0x101a0c -s_atomic_add s20, s0, 0x101a10 -s_atomic_add s20, s0, 0x101a14 -s_atomic_add s20, s0, 0x101a18 -s_atomic_add s20, s0, 0x101a1c -s_atomic_add s20, s0, 0x101a20 -s_atomic_add s20, s0, 0x101a24 -s_atomic_add s20, s0, 0x101a28 -s_atomic_add s20, s0, 0x101a2c - -s_atomic_add s20, s0, 0x101b00 -s_atomic_add s20, s0, 0x101b04 -s_atomic_add s20, s0, 0x101b08 -s_atomic_add s20, s0, 0x101b0c -s_atomic_add s20, s0, 0x101b10 -s_atomic_add s20, s0, 0x101b14 -s_atomic_add s20, s0, 0x101b18 -s_atomic_add s20, s0, 0x101b1c -s_atomic_add s20, s0, 0x101b20 -s_atomic_add s20, s0, 0x101b24 -s_atomic_add s20, s0, 0x101b28 -s_atomic_add s20, s0, 0x101b2c - -s_atomic_add s20, s0, 0x101c00 -s_atomic_add s20, s0, 0x101c04 -s_atomic_add s20, s0, 0x101c08 -s_atomic_add s20, s0, 0x101c0c -s_atomic_add s20, s0, 0x101c10 -s_atomic_add s20, s0, 0x101c14 -s_atomic_add s20, s0, 0x101c18 -s_atomic_add s20, s0, 0x101c1c -s_atomic_add s20, s0, 0x101c20 -s_atomic_add s20, s0, 0x101c24 -s_atomic_add s20, s0, 0x101c28 -s_atomic_add s20, s0, 0x101c2c - -s_atomic_add s20, s0, 0x100000 -s_atomic_add s20, s0, 0x100010 -s_atomic_add s20, s0, 0x100020 -s_atomic_add s20, s0, 0x100030 -s_atomic_add s20, s0, 0x100040 -s_atomic_add s20, s0, 0x100050 -s_atomic_add s20, s0, 0x100060 -s_atomic_add s20, s0, 0x100070 -s_atomic_add s20, s0, 0x100080 -s_atomic_add s20, s0, 0x100090 - -s_atomic_add s20, s0, 0x100100 -s_atomic_add s20, s0, 0x100110 -s_atomic_add s20, s0, 0x100120 -s_atomic_add s20, s0, 0x100130 -s_atomic_add s20, s0, 0x100140 -s_atomic_add s20, s0, 0x100150 -s_atomic_add s20, s0, 0x100160 -s_atomic_add s20, s0, 0x100170 -s_atomic_add s20, s0, 0x100180 -s_atomic_add s20, s0, 0x100190 - -s_atomic_add s20, s0, 0x100200 -s_atomic_add s20, s0, 0x100210 -s_atomic_add s20, s0, 0x100220 -s_atomic_add s20, s0, 0x100230 -s_atomic_add s20, s0, 0x100240 -s_atomic_add s20, s0, 0x100250 -s_atomic_add s20, s0, 0x100260 -s_atomic_add s20, s0, 0x100270 -s_atomic_add s20, s0, 0x100280 -s_atomic_add s20, s0, 0x100290 - -s_atomic_add s20, s0, 0x100300 -s_atomic_add s20, s0, 0x100310 -s_atomic_add s20, s0, 0x100320 -s_atomic_add s20, s0, 0x100330 -s_atomic_add s20, s0, 0x100340 -s_atomic_add s20, s0, 0x100350 -s_atomic_add s20, s0, 0x100360 -s_atomic_add s20, s0, 0x100370 -s_atomic_add s20, s0, 0x100380 -s_atomic_add s20, s0, 0x100390 - -s_atomic_add s20, s0, 0x100400 -s_atomic_add s20, s0, 0x100404 -s_atomic_add s20, s0, 0x100408 -s_atomic_add s20, s0, 0x10040c -s_atomic_add s20, s0, 0x100410 -s_atomic_add s20, s0, 0x100414 -s_atomic_add s20, s0, 0x100418 -s_atomic_add s20, s0, 0x10041c -s_atomic_add s20, s0, 0x100420 -s_atomic_add s20, s0, 0x100424 -s_atomic_add s20, s0, 0x100428 -s_atomic_add s20, s0, 0x10042c - -s_atomic_add s20, s0, 0x100500 -s_atomic_add s20, s0, 0x100504 -s_atomic_add s20, s0, 0x100508 -s_atomic_add s20, s0, 0x10050c -s_atomic_add s20, s0, 0x100510 -s_atomic_add s20, s0, 0x100514 -s_atomic_add s20, s0, 0x100518 -s_atomic_add s20, s0, 0x10051c -s_atomic_add s20, s0, 0x100520 -s_atomic_add s20, s0, 0x100524 -s_atomic_add s20, s0, 0x100528 -s_atomic_add s20, s0, 0x10052c - -s_atomic_add s20, s0, 0x100600 -s_atomic_add s20, s0, 0x100604 -s_atomic_add s20, s0, 0x100608 -s_atomic_add s20, s0, 0x10060c -s_atomic_add s20, s0, 0x100610 -s_atomic_add s20, s0, 0x100614 -s_atomic_add s20, s0, 0x100618 -s_atomic_add s20, s0, 0x10061c -s_atomic_add s20, s0, 0x100620 -s_atomic_add s20, s0, 0x100624 -s_atomic_add s20, s0, 0x100628 -s_atomic_add s20, s0, 0x10062c - -s_atomic_add s20, s0, 0x100700 -s_atomic_add s20, s0, 0x100704 -s_atomic_add s20, s0, 0x100708 -s_atomic_add s20, s0, 0x10070c -s_atomic_add s20, s0, 0x100710 -s_atomic_add s20, s0, 0x100714 -s_atomic_add s20, s0, 0x100718 -s_atomic_add s20, s0, 0x10071c -s_atomic_add s20, s0, 0x100720 -s_atomic_add s20, s0, 0x100724 -s_atomic_add s20, s0, 0x100728 -s_atomic_add s20, s0, 0x10072c - -s_atomic_add s20, s0, 0x100800 -s_atomic_add s20, s0, 0x100804 -s_atomic_add s20, s0, 0x100808 -s_atomic_add s20, s0, 0x10080c -s_atomic_add s20, s0, 0x100810 -s_atomic_add s20, s0, 0x100814 -s_atomic_add s20, s0, 0x100818 -s_atomic_add s20, s0, 0x10081c -s_atomic_add s20, s0, 0x100820 -s_atomic_add s20, s0, 0x100824 -s_atomic_add s20, s0, 0x100828 -s_atomic_add s20, s0, 0x10082c - -s_atomic_add s20, s0, 0x100900 -s_atomic_add s20, s0, 0x100904 -s_atomic_add s20, s0, 0x100908 -s_atomic_add s20, s0, 0x10090c -s_atomic_add s20, s0, 0x100910 -s_atomic_add s20, s0, 0x100914 -s_atomic_add s20, s0, 0x100918 -s_atomic_add s20, s0, 0x10091c -s_atomic_add s20, s0, 0x100920 -s_atomic_add s20, s0, 0x100924 -s_atomic_add s20, s0, 0x100928 -s_atomic_add s20, s0, 0x10092c - -s_atomic_add s20, s0, 0x100a00 -s_atomic_add s20, s0, 0x100a04 -s_atomic_add s20, s0, 0x100a08 -s_atomic_add s20, s0, 0x100a0c -s_atomic_add s20, s0, 0x100a10 -s_atomic_add s20, s0, 0x100a14 -s_atomic_add s20, s0, 0x100a18 -s_atomic_add s20, s0, 0x100a1c -s_atomic_add s20, s0, 0x100a20 -s_atomic_add s20, s0, 0x100a24 -s_atomic_add s20, s0, 0x100a28 -s_atomic_add s20, s0, 0x100a2c - -s_atomic_add s20, s0, 0x100b00 -s_atomic_add s20, s0, 0x100b04 -s_atomic_add s20, s0, 0x100b08 -s_atomic_add s20, s0, 0x100b0c -s_atomic_add s20, s0, 0x100b10 -s_atomic_add s20, s0, 0x100b14 -s_atomic_add s20, s0, 0x100b18 -s_atomic_add s20, s0, 0x100b1c -s_atomic_add s20, s0, 0x100b20 -s_atomic_add s20, s0, 0x100b24 -s_atomic_add s20, s0, 0x100b28 -s_atomic_add s20, s0, 0x100b2c - -s_atomic_add s20, s0, 0x100c00 -s_atomic_add s20, s0, 0x100c04 -s_atomic_add s20, s0, 0x100c08 -s_atomic_add s20, s0, 0x100c0c -s_atomic_add s20, s0, 0x100c10 -s_atomic_add s20, s0, 0x100c14 -s_atomic_add s20, s0, 0x100c18 -s_atomic_add s20, s0, 0x100c1c -s_atomic_add s20, s0, 0x100c20 -s_atomic_add s20, s0, 0x100c24 -s_atomic_add s20, s0, 0x100c28 -s_atomic_add s20, s0, 0x100c2c - -s_atomic_add s20, s0, 0x101000 -s_atomic_add s20, s0, 0x101010 -s_atomic_add s20, s0, 0x101020 -s_atomic_add s20, s0, 0x101030 -s_atomic_add s20, s0, 0x101040 -s_atomic_add s20, s0, 0x101050 -s_atomic_add s20, s0, 0x101060 -s_atomic_add s20, s0, 0x101070 -s_atomic_add s20, s0, 0x101080 -s_atomic_add s20, s0, 0x101090 - -s_atomic_add s20, s0, 0x101100 -s_atomic_add s20, s0, 0x101110 -s_atomic_add s20, s0, 0x101120 -s_atomic_add s20, s0, 0x101130 -s_atomic_add s20, s0, 0x101140 -s_atomic_add s20, s0, 0x101150 -s_atomic_add s20, s0, 0x101160 -s_atomic_add s20, s0, 0x101170 -s_atomic_add s20, s0, 0x101180 -s_atomic_add s20, s0, 0x101190 - -s_atomic_add s20, s0, 0x101200 -s_atomic_add s20, s0, 0x101210 -s_atomic_add s20, s0, 0x101220 -s_atomic_add s20, s0, 0x101230 -s_atomic_add s20, s0, 0x101240 -s_atomic_add s20, s0, 0x101250 -s_atomic_add s20, s0, 0x101260 -s_atomic_add s20, s0, 0x101270 -s_atomic_add s20, s0, 0x101280 -s_atomic_add s20, s0, 0x101290 - -s_atomic_add s20, s0, 0x101300 -s_atomic_add s20, s0, 0x101310 -s_atomic_add s20, s0, 0x101320 -s_atomic_add s20, s0, 0x101330 -s_atomic_add s20, s0, 0x101340 -s_atomic_add s20, s0, 0x101350 -s_atomic_add s20, s0, 0x101360 -s_atomic_add s20, s0, 0x101370 -s_atomic_add s20, s0, 0x101380 -s_atomic_add s20, s0, 0x101390 - -s_atomic_add s20, s0, 0x101400 -s_atomic_add s20, s0, 0x101404 -s_atomic_add s20, s0, 0x101408 -s_atomic_add s20, s0, 0x10140c -s_atomic_add s20, s0, 0x101410 -s_atomic_add s20, s0, 0x101414 -s_atomic_add s20, s0, 0x101418 -s_atomic_add s20, s0, 0x10141c -s_atomic_add s20, s0, 0x101420 -s_atomic_add s20, s0, 0x101424 -s_atomic_add s20, s0, 0x101428 -s_atomic_add s20, s0, 0x10142c - -s_atomic_add s20, s0, 0x101500 -s_atomic_add s20, s0, 0x101504 -s_atomic_add s20, s0, 0x101508 -s_atomic_add s20, s0, 0x10150c -s_atomic_add s20, s0, 0x101510 -s_atomic_add s20, s0, 0x101514 -s_atomic_add s20, s0, 0x101518 -s_atomic_add s20, s0, 0x10151c -s_atomic_add s20, s0, 0x101520 -s_atomic_add s20, s0, 0x101524 -s_atomic_add s20, s0, 0x101528 -s_atomic_add s20, s0, 0x10152c - -s_atomic_add s20, s0, 0x101600 -s_atomic_add s20, s0, 0x101604 -s_atomic_add s20, s0, 0x101608 -s_atomic_add s20, s0, 0x10160c -s_atomic_add s20, s0, 0x101610 -s_atomic_add s20, s0, 0x101614 -s_atomic_add s20, s0, 0x101618 -s_atomic_add s20, s0, 0x10161c -s_atomic_add s20, s0, 0x101620 -s_atomic_add s20, s0, 0x101624 -s_atomic_add s20, s0, 0x101628 -s_atomic_add s20, s0, 0x10162c - -s_atomic_add s20, s0, 0x101700 -s_atomic_add s20, s0, 0x101704 -s_atomic_add s20, s0, 0x101708 -s_atomic_add s20, s0, 0x10170c -s_atomic_add s20, s0, 0x101710 -s_atomic_add s20, s0, 0x101714 -s_atomic_add s20, s0, 0x101718 -s_atomic_add s20, s0, 0x10171c -s_atomic_add s20, s0, 0x101720 -s_atomic_add s20, s0, 0x101724 -s_atomic_add s20, s0, 0x101728 -s_atomic_add s20, s0, 0x10172c - -s_atomic_add s20, s0, 0x101800 -s_atomic_add s20, s0, 0x101804 -s_atomic_add s20, s0, 0x101808 -s_atomic_add s20, s0, 0x10180c -s_atomic_add s20, s0, 0x101810 -s_atomic_add s20, s0, 0x101814 -s_atomic_add s20, s0, 0x101818 -s_atomic_add s20, s0, 0x10181c -s_atomic_add s20, s0, 0x101820 -s_atomic_add s20, s0, 0x101824 -s_atomic_add s20, s0, 0x101828 -s_atomic_add s20, s0, 0x10182c - -s_atomic_add s20, s0, 0x101900 -s_atomic_add s20, s0, 0x101904 -s_atomic_add s20, s0, 0x101908 -s_atomic_add s20, s0, 0x10190c -s_atomic_add s20, s0, 0x101910 -s_atomic_add s20, s0, 0x101914 -s_atomic_add s20, s0, 0x101918 -s_atomic_add s20, s0, 0x10191c -s_atomic_add s20, s0, 0x101920 -s_atomic_add s20, s0, 0x101924 -s_atomic_add s20, s0, 0x101928 -s_atomic_add s20, s0, 0x10192c - -s_atomic_add s20, s0, 0x101a00 -s_atomic_add s20, s0, 0x101a04 -s_atomic_add s20, s0, 0x101a08 -s_atomic_add s20, s0, 0x101a0c -s_atomic_add s20, s0, 0x101a10 -s_atomic_add s20, s0, 0x101a14 -s_atomic_add s20, s0, 0x101a18 -s_atomic_add s20, s0, 0x101a1c -s_atomic_add s20, s0, 0x101a20 -s_atomic_add s20, s0, 0x101a24 -s_atomic_add s20, s0, 0x101a28 -s_atomic_add s20, s0, 0x101a2c - -s_atomic_add s20, s0, 0x101b00 -s_atomic_add s20, s0, 0x101b04 -s_atomic_add s20, s0, 0x101b08 -s_atomic_add s20, s0, 0x101b0c -s_atomic_add s20, s0, 0x101b10 -s_atomic_add s20, s0, 0x101b14 -s_atomic_add s20, s0, 0x101b18 -s_atomic_add s20, s0, 0x101b1c -s_atomic_add s20, s0, 0x101b20 -s_atomic_add s20, s0, 0x101b24 -s_atomic_add s20, s0, 0x101b28 -s_atomic_add s20, s0, 0x101b2c - -s_atomic_add s20, s0, 0x101c00 -s_atomic_add s20, s0, 0x101c04 -s_atomic_add s20, s0, 0x101c08 -s_atomic_add s20, s0, 0x101c0c -s_atomic_add s20, s0, 0x101c10 -s_atomic_add s20, s0, 0x101c14 -s_atomic_add s20, s0, 0x101c18 -s_atomic_add s20, s0, 0x101c1c -s_atomic_add s20, s0, 0x101c20 -s_atomic_add s20, s0, 0x101c24 -s_atomic_add s20, s0, 0x101c28 -s_atomic_add s20, s0, 0x101c2c - -s_atomic_add s20, s0, 0x100000 -s_atomic_add s20, s0, 0x100010 -s_atomic_add s20, s0, 0x100020 -s_atomic_add s20, s0, 0x100030 -s_atomic_add s20, s0, 0x100040 -s_atomic_add s20, s0, 0x100050 -s_atomic_add s20, s0, 0x100060 -s_atomic_add s20, s0, 0x100070 -s_atomic_add s20, s0, 0x100080 -s_atomic_add s20, s0, 0x100090 - -s_atomic_add s20, s0, 0x100100 -s_atomic_add s20, s0, 0x100110 -s_atomic_add s20, s0, 0x100120 -s_atomic_add s20, s0, 0x100130 -s_atomic_add s20, s0, 0x100140 -s_atomic_add s20, s0, 0x100150 -s_atomic_add s20, s0, 0x100160 -s_atomic_add s20, s0, 0x100170 -s_atomic_add s20, s0, 0x100180 -s_atomic_add s20, s0, 0x100190 - -s_atomic_add s20, s0, 0x100200 -s_atomic_add s20, s0, 0x100210 -s_atomic_add s20, s0, 0x100220 -s_atomic_add s20, s0, 0x100230 -s_atomic_add s20, s0, 0x100240 -s_atomic_add s20, s0, 0x100250 -s_atomic_add s20, s0, 0x100260 -s_atomic_add s20, s0, 0x100270 -s_atomic_add s20, s0, 0x100280 -s_atomic_add s20, s0, 0x100290 - -s_atomic_add s20, s0, 0x100300 -s_atomic_add s20, s0, 0x100310 -s_atomic_add s20, s0, 0x100320 -s_atomic_add s20, s0, 0x100330 -s_atomic_add s20, s0, 0x100340 -s_atomic_add s20, s0, 0x100350 -s_atomic_add s20, s0, 0x100360 -s_atomic_add s20, s0, 0x100370 -s_atomic_add s20, s0, 0x100380 -s_atomic_add s20, s0, 0x100390 - -s_atomic_add s20, s0, 0x100400 -s_atomic_add s20, s0, 0x100404 -s_atomic_add s20, s0, 0x100408 -s_atomic_add s20, s0, 0x10040c -s_atomic_add s20, s0, 0x100410 -s_atomic_add s20, s0, 0x100414 -s_atomic_add s20, s0, 0x100418 -s_atomic_add s20, s0, 0x10041c -s_atomic_add s20, s0, 0x100420 -s_atomic_add s20, s0, 0x100424 -s_atomic_add s20, s0, 0x100428 -s_atomic_add s20, s0, 0x10042c - -s_atomic_add s20, s0, 0x100500 -s_atomic_add s20, s0, 0x100504 -s_atomic_add s20, s0, 0x100508 -s_atomic_add s20, s0, 0x10050c -s_atomic_add s20, s0, 0x100510 -s_atomic_add s20, s0, 0x100514 -s_atomic_add s20, s0, 0x100518 -s_atomic_add s20, s0, 0x10051c -s_atomic_add s20, s0, 0x100520 -s_atomic_add s20, s0, 0x100524 -s_atomic_add s20, s0, 0x100528 -s_atomic_add s20, s0, 0x10052c - -s_atomic_add s20, s0, 0x100600 -s_atomic_add s20, s0, 0x100604 -s_atomic_add s20, s0, 0x100608 -s_atomic_add s20, s0, 0x10060c -s_atomic_add s20, s0, 0x100610 -s_atomic_add s20, s0, 0x100614 -s_atomic_add s20, s0, 0x100618 -s_atomic_add s20, s0, 0x10061c -s_atomic_add s20, s0, 0x100620 -s_atomic_add s20, s0, 0x100624 -s_atomic_add s20, s0, 0x100628 -s_atomic_add s20, s0, 0x10062c - -s_atomic_add s20, s0, 0x100700 -s_atomic_add s20, s0, 0x100704 -s_atomic_add s20, s0, 0x100708 -s_atomic_add s20, s0, 0x10070c -s_atomic_add s20, s0, 0x100710 -s_atomic_add s20, s0, 0x100714 -s_atomic_add s20, s0, 0x100718 -s_atomic_add s20, s0, 0x10071c -s_atomic_add s20, s0, 0x100720 -s_atomic_add s20, s0, 0x100724 -s_atomic_add s20, s0, 0x100728 -s_atomic_add s20, s0, 0x10072c - -s_atomic_add s20, s0, 0x100800 -s_atomic_add s20, s0, 0x100804 -s_atomic_add s20, s0, 0x100808 -s_atomic_add s20, s0, 0x10080c -s_atomic_add s20, s0, 0x100810 -s_atomic_add s20, s0, 0x100814 -s_atomic_add s20, s0, 0x100818 -s_atomic_add s20, s0, 0x10081c -s_atomic_add s20, s0, 0x100820 -s_atomic_add s20, s0, 0x100824 -s_atomic_add s20, s0, 0x100828 -s_atomic_add s20, s0, 0x10082c - -s_atomic_add s20, s0, 0x100900 -s_atomic_add s20, s0, 0x100904 -s_atomic_add s20, s0, 0x100908 -s_atomic_add s20, s0, 0x10090c -s_atomic_add s20, s0, 0x100910 -s_atomic_add s20, s0, 0x100914 -s_atomic_add s20, s0, 0x100918 -s_atomic_add s20, s0, 0x10091c -s_atomic_add s20, s0, 0x100920 -s_atomic_add s20, s0, 0x100924 -s_atomic_add s20, s0, 0x100928 -s_atomic_add s20, s0, 0x10092c - -s_atomic_add s20, s0, 0x100a00 -s_atomic_add s20, s0, 0x100a04 -s_atomic_add s20, s0, 0x100a08 -s_atomic_add s20, s0, 0x100a0c -s_atomic_add s20, s0, 0x100a10 -s_atomic_add s20, s0, 0x100a14 -s_atomic_add s20, s0, 0x100a18 -s_atomic_add s20, s0, 0x100a1c -s_atomic_add s20, s0, 0x100a20 -s_atomic_add s20, s0, 0x100a24 -s_atomic_add s20, s0, 0x100a28 -s_atomic_add s20, s0, 0x100a2c - -s_atomic_add s20, s0, 0x100b00 -s_atomic_add s20, s0, 0x100b04 -s_atomic_add s20, s0, 0x100b08 -s_atomic_add s20, s0, 0x100b0c -s_atomic_add s20, s0, 0x100b10 -s_atomic_add s20, s0, 0x100b14 -s_atomic_add s20, s0, 0x100b18 -s_atomic_add s20, s0, 0x100b1c -s_atomic_add s20, s0, 0x100b20 -s_atomic_add s20, s0, 0x100b24 -s_atomic_add s20, s0, 0x100b28 -s_atomic_add s20, s0, 0x100b2c - -s_atomic_add s20, s0, 0x100c00 -s_atomic_add s20, s0, 0x100c04 -s_atomic_add s20, s0, 0x100c08 -s_atomic_add s20, s0, 0x100c0c -s_atomic_add s20, s0, 0x100c10 -s_atomic_add s20, s0, 0x100c14 -s_atomic_add s20, s0, 0x100c18 -s_atomic_add s20, s0, 0x100c1c -s_atomic_add s20, s0, 0x100c20 -s_atomic_add s20, s0, 0x100c24 -s_atomic_add s20, s0, 0x100c28 -s_atomic_add s20, s0, 0x100c2c - -s_atomic_add s20, s0, 0x101000 -s_atomic_add s20, s0, 0x101010 -s_atomic_add s20, s0, 0x101020 -s_atomic_add s20, s0, 0x101030 -s_atomic_add s20, s0, 0x101040 -s_atomic_add s20, s0, 0x101050 -s_atomic_add s20, s0, 0x101060 -s_atomic_add s20, s0, 0x101070 -s_atomic_add s20, s0, 0x101080 -s_atomic_add s20, s0, 0x101090 - -s_atomic_add s20, s0, 0x101100 -s_atomic_add s20, s0, 0x101110 -s_atomic_add s20, s0, 0x101120 -s_atomic_add s20, s0, 0x101130 -s_atomic_add s20, s0, 0x101140 -s_atomic_add s20, s0, 0x101150 -s_atomic_add s20, s0, 0x101160 -s_atomic_add s20, s0, 0x101170 -s_atomic_add s20, s0, 0x101180 -s_atomic_add s20, s0, 0x101190 - -s_atomic_add s20, s0, 0x101200 -s_atomic_add s20, s0, 0x101210 -s_atomic_add s20, s0, 0x101220 -s_atomic_add s20, s0, 0x101230 -s_atomic_add s20, s0, 0x101240 -s_atomic_add s20, s0, 0x101250 -s_atomic_add s20, s0, 0x101260 -s_atomic_add s20, s0, 0x101270 -s_atomic_add s20, s0, 0x101280 -s_atomic_add s20, s0, 0x101290 - -s_atomic_add s20, s0, 0x101300 -s_atomic_add s20, s0, 0x101310 -s_atomic_add s20, s0, 0x101320 -s_atomic_add s20, s0, 0x101330 -s_atomic_add s20, s0, 0x101340 -s_atomic_add s20, s0, 0x101350 -s_atomic_add s20, s0, 0x101360 -s_atomic_add s20, s0, 0x101370 -s_atomic_add s20, s0, 0x101380 -s_atomic_add s20, s0, 0x101390 - -s_atomic_add s20, s0, 0x101400 -s_atomic_add s20, s0, 0x101404 -s_atomic_add s20, s0, 0x101408 -s_atomic_add s20, s0, 0x10140c -s_atomic_add s20, s0, 0x101410 -s_atomic_add s20, s0, 0x101414 -s_atomic_add s20, s0, 0x101418 -s_atomic_add s20, s0, 0x10141c -s_atomic_add s20, s0, 0x101420 -s_atomic_add s20, s0, 0x101424 -s_atomic_add s20, s0, 0x101428 -s_atomic_add s20, s0, 0x10142c - -s_atomic_add s20, s0, 0x101500 -s_atomic_add s20, s0, 0x101504 -s_atomic_add s20, s0, 0x101508 -s_atomic_add s20, s0, 0x10150c -s_atomic_add s20, s0, 0x101510 -s_atomic_add s20, s0, 0x101514 -s_atomic_add s20, s0, 0x101518 -s_atomic_add s20, s0, 0x10151c -s_atomic_add s20, s0, 0x101520 -s_atomic_add s20, s0, 0x101524 -s_atomic_add s20, s0, 0x101528 -s_atomic_add s20, s0, 0x10152c - -s_atomic_add s20, s0, 0x101600 -s_atomic_add s20, s0, 0x101604 -s_atomic_add s20, s0, 0x101608 -s_atomic_add s20, s0, 0x10160c -s_atomic_add s20, s0, 0x101610 -s_atomic_add s20, s0, 0x101614 -s_atomic_add s20, s0, 0x101618 -s_atomic_add s20, s0, 0x10161c -s_atomic_add s20, s0, 0x101620 -s_atomic_add s20, s0, 0x101624 -s_atomic_add s20, s0, 0x101628 -s_atomic_add s20, s0, 0x10162c - -s_atomic_add s20, s0, 0x101700 -s_atomic_add s20, s0, 0x101704 -s_atomic_add s20, s0, 0x101708 -s_atomic_add s20, s0, 0x10170c -s_atomic_add s20, s0, 0x101710 -s_atomic_add s20, s0, 0x101714 -s_atomic_add s20, s0, 0x101718 -s_atomic_add s20, s0, 0x10171c -s_atomic_add s20, s0, 0x101720 -s_atomic_add s20, s0, 0x101724 -s_atomic_add s20, s0, 0x101728 -s_atomic_add s20, s0, 0x10172c - -s_atomic_add s20, s0, 0x101800 -s_atomic_add s20, s0, 0x101804 -s_atomic_add s20, s0, 0x101808 -s_atomic_add s20, s0, 0x10180c -s_atomic_add s20, s0, 0x101810 -s_atomic_add s20, s0, 0x101814 -s_atomic_add s20, s0, 0x101818 -s_atomic_add s20, s0, 0x10181c -s_atomic_add s20, s0, 0x101820 -s_atomic_add s20, s0, 0x101824 -s_atomic_add s20, s0, 0x101828 -s_atomic_add s20, s0, 0x10182c - -s_atomic_add s20, s0, 0x101900 -s_atomic_add s20, s0, 0x101904 -s_atomic_add s20, s0, 0x101908 -s_atomic_add s20, s0, 0x10190c -s_atomic_add s20, s0, 0x101910 -s_atomic_add s20, s0, 0x101914 -s_atomic_add s20, s0, 0x101918 -s_atomic_add s20, s0, 0x10191c -s_atomic_add s20, s0, 0x101920 -s_atomic_add s20, s0, 0x101924 -s_atomic_add s20, s0, 0x101928 -s_atomic_add s20, s0, 0x10192c - -s_atomic_add s20, s0, 0x101a00 -s_atomic_add s20, s0, 0x101a04 -s_atomic_add s20, s0, 0x101a08 -s_atomic_add s20, s0, 0x101a0c -s_atomic_add s20, s0, 0x101a10 -s_atomic_add s20, s0, 0x101a14 -s_atomic_add s20, s0, 0x101a18 -s_atomic_add s20, s0, 0x101a1c -s_atomic_add s20, s0, 0x101a20 -s_atomic_add s20, s0, 0x101a24 -s_atomic_add s20, s0, 0x101a28 -s_atomic_add s20, s0, 0x101a2c - -s_atomic_add s20, s0, 0x101b00 -s_atomic_add s20, s0, 0x101b04 -s_atomic_add s20, s0, 0x101b08 -s_atomic_add s20, s0, 0x101b0c -s_atomic_add s20, s0, 0x101b10 -s_atomic_add s20, s0, 0x101b14 -s_atomic_add s20, s0, 0x101b18 -s_atomic_add s20, s0, 0x101b1c -s_atomic_add s20, s0, 0x101b20 -s_atomic_add s20, s0, 0x101b24 -s_atomic_add s20, s0, 0x101b28 -s_atomic_add s20, s0, 0x101b2c - -s_atomic_add s20, s0, 0x101c00 -s_atomic_add s20, s0, 0x101c04 -s_atomic_add s20, s0, 0x101c08 -s_atomic_add s20, s0, 0x101c0c -s_atomic_add s20, s0, 0x101c10 -s_atomic_add s20, s0, 0x101c14 -s_atomic_add s20, s0, 0x101c18 -s_atomic_add s20, s0, 0x101c1c -s_atomic_add s20, s0, 0x101c20 -s_atomic_add s20, s0, 0x101c24 -s_atomic_add s20, s0, 0x101c28 -s_atomic_add s20, s0, 0x101c2c - -s_atomic_add s20, s0, 0x100000 -s_atomic_add s20, s0, 0x100010 -s_atomic_add s20, s0, 0x100020 -s_atomic_add s20, s0, 0x100030 -s_atomic_add s20, s0, 0x100040 -s_atomic_add s20, s0, 0x100050 -s_atomic_add s20, s0, 0x100060 -s_atomic_add s20, s0, 0x100070 -s_atomic_add s20, s0, 0x100080 -s_atomic_add s20, s0, 0x100090 - -s_atomic_add s20, s0, 0x100100 -s_atomic_add s20, s0, 0x100110 -s_atomic_add s20, s0, 0x100120 -s_atomic_add s20, s0, 0x100130 -s_atomic_add s20, s0, 0x100140 -s_atomic_add s20, s0, 0x100150 -s_atomic_add s20, s0, 0x100160 -s_atomic_add s20, s0, 0x100170 -s_atomic_add s20, s0, 0x100180 -s_atomic_add s20, s0, 0x100190 - -s_atomic_add s20, s0, 0x100200 -s_atomic_add s20, s0, 0x100210 -s_atomic_add s20, s0, 0x100220 -s_atomic_add s20, s0, 0x100230 -s_atomic_add s20, s0, 0x100240 -s_atomic_add s20, s0, 0x100250 -s_atomic_add s20, s0, 0x100260 -s_atomic_add s20, s0, 0x100270 -s_atomic_add s20, s0, 0x100280 -s_atomic_add s20, s0, 0x100290 - -s_atomic_add s20, s0, 0x100300 -s_atomic_add s20, s0, 0x100310 -s_atomic_add s20, s0, 0x100320 -s_atomic_add s20, s0, 0x100330 -s_atomic_add s20, s0, 0x100340 -s_atomic_add s20, s0, 0x100350 -s_atomic_add s20, s0, 0x100360 -s_atomic_add s20, s0, 0x100370 -s_atomic_add s20, s0, 0x100380 -s_atomic_add s20, s0, 0x100390 - -s_atomic_add s20, s0, 0x100400 -s_atomic_add s20, s0, 0x100404 -s_atomic_add s20, s0, 0x100408 -s_atomic_add s20, s0, 0x10040c -s_atomic_add s20, s0, 0x100410 -s_atomic_add s20, s0, 0x100414 -s_atomic_add s20, s0, 0x100418 -s_atomic_add s20, s0, 0x10041c -s_atomic_add s20, s0, 0x100420 -s_atomic_add s20, s0, 0x100424 -s_atomic_add s20, s0, 0x100428 -s_atomic_add s20, s0, 0x10042c - -s_atomic_add s20, s0, 0x100500 -s_atomic_add s20, s0, 0x100504 -s_atomic_add s20, s0, 0x100508 -s_atomic_add s20, s0, 0x10050c -s_atomic_add s20, s0, 0x100510 -s_atomic_add s20, s0, 0x100514 -s_atomic_add s20, s0, 0x100518 -s_atomic_add s20, s0, 0x10051c -s_atomic_add s20, s0, 0x100520 -s_atomic_add s20, s0, 0x100524 -s_atomic_add s20, s0, 0x100528 -s_atomic_add s20, s0, 0x10052c - -s_atomic_add s20, s0, 0x100600 -s_atomic_add s20, s0, 0x100604 -s_atomic_add s20, s0, 0x100608 -s_atomic_add s20, s0, 0x10060c -s_atomic_add s20, s0, 0x100610 -s_atomic_add s20, s0, 0x100614 -s_atomic_add s20, s0, 0x100618 -s_atomic_add s20, s0, 0x10061c -s_atomic_add s20, s0, 0x100620 -s_atomic_add s20, s0, 0x100624 -s_atomic_add s20, s0, 0x100628 -s_atomic_add s20, s0, 0x10062c - -s_atomic_add s20, s0, 0x100700 -s_atomic_add s20, s0, 0x100704 -s_atomic_add s20, s0, 0x100708 -s_atomic_add s20, s0, 0x10070c -s_atomic_add s20, s0, 0x100710 -s_atomic_add s20, s0, 0x100714 -s_atomic_add s20, s0, 0x100718 -s_atomic_add s20, s0, 0x10071c -s_atomic_add s20, s0, 0x100720 -s_atomic_add s20, s0, 0x100724 -s_atomic_add s20, s0, 0x100728 -s_atomic_add s20, s0, 0x10072c - -s_atomic_add s20, s0, 0x100800 -s_atomic_add s20, s0, 0x100804 -s_atomic_add s20, s0, 0x100808 -s_atomic_add s20, s0, 0x10080c -s_atomic_add s20, s0, 0x100810 -s_atomic_add s20, s0, 0x100814 -s_atomic_add s20, s0, 0x100818 -s_atomic_add s20, s0, 0x10081c -s_atomic_add s20, s0, 0x100820 -s_atomic_add s20, s0, 0x100824 -s_atomic_add s20, s0, 0x100828 -s_atomic_add s20, s0, 0x10082c - -s_atomic_add s20, s0, 0x100900 -s_atomic_add s20, s0, 0x100904 -s_atomic_add s20, s0, 0x100908 -s_atomic_add s20, s0, 0x10090c -s_atomic_add s20, s0, 0x100910 -s_atomic_add s20, s0, 0x100914 -s_atomic_add s20, s0, 0x100918 -s_atomic_add s20, s0, 0x10091c -s_atomic_add s20, s0, 0x100920 -s_atomic_add s20, s0, 0x100924 -s_atomic_add s20, s0, 0x100928 -s_atomic_add s20, s0, 0x10092c - -s_atomic_add s20, s0, 0x100a00 -s_atomic_add s20, s0, 0x100a04 -s_atomic_add s20, s0, 0x100a08 -s_atomic_add s20, s0, 0x100a0c -s_atomic_add s20, s0, 0x100a10 -s_atomic_add s20, s0, 0x100a14 -s_atomic_add s20, s0, 0x100a18 -s_atomic_add s20, s0, 0x100a1c -s_atomic_add s20, s0, 0x100a20 -s_atomic_add s20, s0, 0x100a24 -s_atomic_add s20, s0, 0x100a28 -s_atomic_add s20, s0, 0x100a2c - -s_atomic_add s20, s0, 0x100b00 -s_atomic_add s20, s0, 0x100b04 -s_atomic_add s20, s0, 0x100b08 -s_atomic_add s20, s0, 0x100b0c -s_atomic_add s20, s0, 0x100b10 -s_atomic_add s20, s0, 0x100b14 -s_atomic_add s20, s0, 0x100b18 -s_atomic_add s20, s0, 0x100b1c -s_atomic_add s20, s0, 0x100b20 -s_atomic_add s20, s0, 0x100b24 -s_atomic_add s20, s0, 0x100b28 -s_atomic_add s20, s0, 0x100b2c - -s_atomic_add s20, s0, 0x100c00 -s_atomic_add s20, s0, 0x100c04 -s_atomic_add s20, s0, 0x100c08 -s_atomic_add s20, s0, 0x100c0c -s_atomic_add s20, s0, 0x100c10 -s_atomic_add s20, s0, 0x100c14 -s_atomic_add s20, s0, 0x100c18 -s_atomic_add s20, s0, 0x100c1c -s_atomic_add s20, s0, 0x100c20 -s_atomic_add s20, s0, 0x100c24 -s_atomic_add s20, s0, 0x100c28 -s_atomic_add s20, s0, 0x100c2c - -s_atomic_add s20, s0, 0x101000 -s_atomic_add s20, s0, 0x101010 -s_atomic_add s20, s0, 0x101020 -s_atomic_add s20, s0, 0x101030 -s_atomic_add s20, s0, 0x101040 -s_atomic_add s20, s0, 0x101050 -s_atomic_add s20, s0, 0x101060 -s_atomic_add s20, s0, 0x101070 -s_atomic_add s20, s0, 0x101080 -s_atomic_add s20, s0, 0x101090 - -s_atomic_add s20, s0, 0x101100 -s_atomic_add s20, s0, 0x101110 -s_atomic_add s20, s0, 0x101120 -s_atomic_add s20, s0, 0x101130 -s_atomic_add s20, s0, 0x101140 -s_atomic_add s20, s0, 0x101150 -s_atomic_add s20, s0, 0x101160 -s_atomic_add s20, s0, 0x101170 -s_atomic_add s20, s0, 0x101180 -s_atomic_add s20, s0, 0x101190 - -s_atomic_add s20, s0, 0x101200 -s_atomic_add s20, s0, 0x101210 -s_atomic_add s20, s0, 0x101220 -s_atomic_add s20, s0, 0x101230 -s_atomic_add s20, s0, 0x101240 -s_atomic_add s20, s0, 0x101250 -s_atomic_add s20, s0, 0x101260 -s_atomic_add s20, s0, 0x101270 -s_atomic_add s20, s0, 0x101280 -s_atomic_add s20, s0, 0x101290 - -s_atomic_add s20, s0, 0x101300 -s_atomic_add s20, s0, 0x101310 -s_atomic_add s20, s0, 0x101320 -s_atomic_add s20, s0, 0x101330 -s_atomic_add s20, s0, 0x101340 -s_atomic_add s20, s0, 0x101350 -s_atomic_add s20, s0, 0x101360 -s_atomic_add s20, s0, 0x101370 -s_atomic_add s20, s0, 0x101380 -s_atomic_add s20, s0, 0x101390 - -s_atomic_add s20, s0, 0x101400 -s_atomic_add s20, s0, 0x101404 -s_atomic_add s20, s0, 0x101408 -s_atomic_add s20, s0, 0x10140c -s_atomic_add s20, s0, 0x101410 -s_atomic_add s20, s0, 0x101414 -s_atomic_add s20, s0, 0x101418 -s_atomic_add s20, s0, 0x10141c -s_atomic_add s20, s0, 0x101420 -s_atomic_add s20, s0, 0x101424 -s_atomic_add s20, s0, 0x101428 -s_atomic_add s20, s0, 0x10142c - -s_atomic_add s20, s0, 0x101500 -s_atomic_add s20, s0, 0x101504 -s_atomic_add s20, s0, 0x101508 -s_atomic_add s20, s0, 0x10150c -s_atomic_add s20, s0, 0x101510 -s_atomic_add s20, s0, 0x101514 -s_atomic_add s20, s0, 0x101518 -s_atomic_add s20, s0, 0x10151c -s_atomic_add s20, s0, 0x101520 -s_atomic_add s20, s0, 0x101524 -s_atomic_add s20, s0, 0x101528 -s_atomic_add s20, s0, 0x10152c - -s_atomic_add s20, s0, 0x101600 -s_atomic_add s20, s0, 0x101604 -s_atomic_add s20, s0, 0x101608 -s_atomic_add s20, s0, 0x10160c -s_atomic_add s20, s0, 0x101610 -s_atomic_add s20, s0, 0x101614 -s_atomic_add s20, s0, 0x101618 -s_atomic_add s20, s0, 0x10161c -s_atomic_add s20, s0, 0x101620 -s_atomic_add s20, s0, 0x101624 -s_atomic_add s20, s0, 0x101628 -s_atomic_add s20, s0, 0x10162c - -s_atomic_add s20, s0, 0x101700 -s_atomic_add s20, s0, 0x101704 -s_atomic_add s20, s0, 0x101708 -s_atomic_add s20, s0, 0x10170c -s_atomic_add s20, s0, 0x101710 -s_atomic_add s20, s0, 0x101714 -s_atomic_add s20, s0, 0x101718 -s_atomic_add s20, s0, 0x10171c -s_atomic_add s20, s0, 0x101720 -s_atomic_add s20, s0, 0x101724 -s_atomic_add s20, s0, 0x101728 -s_atomic_add s20, s0, 0x10172c - -s_atomic_add s20, s0, 0x101800 -s_atomic_add s20, s0, 0x101804 -s_atomic_add s20, s0, 0x101808 -s_atomic_add s20, s0, 0x10180c -s_atomic_add s20, s0, 0x101810 -s_atomic_add s20, s0, 0x101814 -s_atomic_add s20, s0, 0x101818 -s_atomic_add s20, s0, 0x10181c -s_atomic_add s20, s0, 0x101820 -s_atomic_add s20, s0, 0x101824 -s_atomic_add s20, s0, 0x101828 -s_atomic_add s20, s0, 0x10182c - -s_atomic_add s20, s0, 0x101900 -s_atomic_add s20, s0, 0x101904 -s_atomic_add s20, s0, 0x101908 -s_atomic_add s20, s0, 0x10190c -s_atomic_add s20, s0, 0x101910 -s_atomic_add s20, s0, 0x101914 -s_atomic_add s20, s0, 0x101918 -s_atomic_add s20, s0, 0x10191c -s_atomic_add s20, s0, 0x101920 -s_atomic_add s20, s0, 0x101924 -s_atomic_add s20, s0, 0x101928 -s_atomic_add s20, s0, 0x10192c - -s_atomic_add s20, s0, 0x101a00 -s_atomic_add s20, s0, 0x101a04 -s_atomic_add s20, s0, 0x101a08 -s_atomic_add s20, s0, 0x101a0c -s_atomic_add s20, s0, 0x101a10 -s_atomic_add s20, s0, 0x101a14 -s_atomic_add s20, s0, 0x101a18 -s_atomic_add s20, s0, 0x101a1c -s_atomic_add s20, s0, 0x101a20 -s_atomic_add s20, s0, 0x101a24 -s_atomic_add s20, s0, 0x101a28 -s_atomic_add s20, s0, 0x101a2c - -s_atomic_add s20, s0, 0x101b00 -s_atomic_add s20, s0, 0x101b04 -s_atomic_add s20, s0, 0x101b08 -s_atomic_add s20, s0, 0x101b0c -s_atomic_add s20, s0, 0x101b10 -s_atomic_add s20, s0, 0x101b14 -s_atomic_add s20, s0, 0x101b18 -s_atomic_add s20, s0, 0x101b1c -s_atomic_add s20, s0, 0x101b20 -s_atomic_add s20, s0, 0x101b24 -s_atomic_add s20, s0, 0x101b28 -s_atomic_add s20, s0, 0x101b2c - -s_atomic_add s20, s0, 0x101c00 -s_atomic_add s20, s0, 0x101c04 -s_atomic_add s20, s0, 0x101c08 -s_atomic_add s20, s0, 0x101c0c -s_atomic_add s20, s0, 0x101c10 -s_atomic_add s20, s0, 0x101c14 -s_atomic_add s20, s0, 0x101c18 -s_atomic_add s20, s0, 0x101c1c -s_atomic_add s20, s0, 0x101c20 -s_atomic_add s20, s0, 0x101c24 -s_atomic_add s20, s0, 0x101c28 -s_atomic_add s20, s0, 0x101c2c - - -s_sub_u32 s30, s30, 1 -s_cmpk_eq_u32 s30, 0 -s_cbranch_scc0 LOAD_LOOP - - -s_cmpk_ge_i32 s9, 0x2 -s_cbranch_scc1 END - -ATOMIC_LOOP: - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - - -s_sub_u32 s30, s30, 1 -s_cmpk_eq_u32 s30, 0 -s_cbranch_scc0 ATOMIC_LOOP - -//s_waitcnt 0 - -END: -s_waitcnt 0 - -s_mov_b32 s16, 0xa5a50000 -s_store_dword s16, s[0:1], 0x40 glc - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tcc_return_control.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tcc_return_control.sp3 deleted file mode 100644 index 99be574f61..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tcc_return_control.sp3 +++ /dev/null @@ -1,1019 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -//set bit 12 low to select EA0 -s_mov_b32 s32, 0xFFFFEFFF -s_and_b32 s24, s24, s32 - -s_and_b32 s31, s9, 0x1 -s_cmpk_eq_i32 s31, 0x1 -s_cbranch_scc1 ODD_WAVES - -//set bit 12 high to select EA1 -s_mov_b32 s32, 0x1000 -s_or_b32 s24, s24, s32 - -ODD_WAVES: - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -v_mul_i32_i24 v3, v3, 0x10 -v_mul_i32_i24 v9, v9, 0x10 - -s_mov_b32 s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 - -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 - -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 - -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 -buffer_store_dwordx4 v0, v9, s24, s31 offen:1 -s_add_i32 s31, s31, 0x6000 - -s_barrier - -s_mov_b32 s31, 0xF0000 -s_mov_b32 s32, 0x6000 - -//store and load s8 times -s_mov_b32 s30, s8 - -//waves with the least 2 tgid (i.e. tgid0 and tgid1) will do atomic operations -//while the rest of waves will do load operations -s_cmpk_lt_i32 s9, 0x2 -s_cbranch_scc1 ATOMIC_LOOP - -LOAD_LOOP: - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -buffer_load_dwordx4 v4, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v8, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v12, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v16, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v20, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v24, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v28, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v32, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v36, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 -buffer_load_dwordx4 v40, v3, s24, s31 offen:1 -s_add_i32 s31, s31, 0x4000 - -s_sub_u32 s30, s30, 1 -s_cmpk_eq_u32 s30, 0 -s_cbranch_scc0 LOAD_LOOP - - -s_cmpk_ge_i32 s9, 0x2 -s_cbranch_scc1 END - -ATOMIC_LOOP: - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 -buffer_atomic_add_x2 v0, v3, s24, s32 offen:1 glc:1 -s_add_i32 s32, s32, 0x2000 - -s_sub_u32 s30, s30, 1 -s_cmpk_eq_u32 s30, 0 -s_cbranch_scc0 ATOMIC_LOOP - -//s_waitcnt 0 - -END: -s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tcp_atcl1.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tcp_atcl1.sp3 deleted file mode 100644 index 93d27c8d80..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tcp_atcl1.sp3 +++ /dev/null @@ -1,80 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_load_dwordx4 s[20:23], s[0:1], 16 // load atc mem surface rsrc -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -var MTYPE_UC = 0x38000000 -s_or_b32 s27, s27, MTYPE_UC - - -//store and load s8 times -s_mov_b32 s8, 33 // store 33 times to overflow atcl1 cache... -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - -STORE_LOOP: -v_add_co_u32 v0, vcc[0:1], v0, 2 -buffer_store_dword v0, v9, s20, s31 idxen:1 glc:1 slc:1 -s_waitcnt 0 -s_add_u32 s31, s31, 4*1024 // step one 4KB page size -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - - - -var DEBUG_FUNCTION = 0 -//remove code to half shader run time -if DEBUG_FUNCTION -s_mov_b32 s8, s30 -s_mov_b32 s31, 0x0 - -LOAD_LOOP: -buffer_load_dword v0, v9, s20, s31 idxen:1 glc:1 slc:1 -s_waitcnt 0 -v_mov_b32 v12, v0 -s_add_u32 s31, s31, 4*1024 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 LOAD_LOOP -end - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tcp_utcl1_fifo1.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tcp_utcl1_fifo1.sp3 deleted file mode 100644 index ac9744f68c..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_tcp_utcl1_fifo1.sp3 +++ /dev/null @@ -1,80 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_load_dwordx4 s[20:23], s[0:1], 16 // load atc mem surface rsrc -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -var MTYPE_UC = 0x38000000 -s_or_b32 s27, s27, MTYPE_UC - - -//store and load s8 times -s_mov_b32 s8, 33 // store 33 times to overflow atcl1 cache... -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - -STORE_LOOP: -v_add_co_u32 v0, vcc[0:1], v0, 2 -buffer_store_dword v0, v9, s20, s31 idxen:1 glc:1 slc:1 -s_waitcnt 0 -s_add_u32 s31, s31, 4*1024 // step one 4KB page size -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - - - -var DEBUG_FUNCTION = 1 -//remove code to half shader run time -if DEBUG_FUNCTION -s_mov_b32 s8, 0x20 -s_mov_b32 s31, 0xffc - -LOAD_LOOP: -buffer_load_dwordx2 v[0:1], v9, s20, s31 idxen:1 glc:1 slc:1 -s_waitcnt 0 -v_mov_b32 v12, v0 -s_add_u32 s31, s31, 4*1024 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 LOAD_LOOP -end - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_utc_vmwalker_pde_02.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_utc_vmwalker_pde_02.sp3 deleted file mode 100644 index 9c66b37275..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_utc_vmwalker_pde_02.sp3 +++ /dev/null @@ -1,72 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 -//bump up the addresses being accessed to generate multiple reads to the pde memories -v_mul_u32_u24 v9, 65536, v9 -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - -//Hack number of records to avoid range checking which we don't want since we want to generate -//out of range accesses. we are really trying to generate many reads to the PDEs to get FUE. -s_mov_b32 s26, 0xffffffff - -STORE_LOOP: -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_waitcnt 0 -s_add_u32 s31, s31, 0x4 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_mov_b32 s8, s30 -s_mov_b32 s31, 0x0 - -LOAD_LOOP: -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_waitcnt 0 -v_mov_b32 v12, v0 -s_add_u32 s31, s31, 0x4 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 LOAD_LOOP - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_utc_vmwalker_pde_03.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_utc_vmwalker_pde_03.sp3 deleted file mode 100644 index d4d53d09a0..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_utc_vmwalker_pde_03.sp3 +++ /dev/null @@ -1,72 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 -//bump up the addresses being accessed to generate multiple reads to the pde memories -v_mul_u32_u24 v9, 4096, v9 -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - -//Hack number of records to avoid range checking which we don't want since we want to generate -//out of range accesses. we are really trying to generate many reads to the PDEs to get FUE. -s_mov_b32 s26, 0xffffffff - -STORE_LOOP: -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_waitcnt 0 -s_add_u32 s31, s31, 0x4 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_mov_b32 s8, s30 -s_mov_b32 s31, 0x0 - -LOAD_LOOP: -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:0 -s_waitcnt 0 -v_mov_b32 v12, v0 -s_add_u32 s31, s31, 0x4 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 LOAD_LOOP - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_vgpr.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_vgpr.sp3 deleted file mode 100644 index 138e13eded..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_vgpr.sp3 +++ /dev/null @@ -1,47 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(8) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset - -tgid_x_en(1) //s_tgid_x s8 -tgid_y_en(1) //s_tgid_y s9 -tgid_z_en(1) //s_tgid_z s10 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//read mem data -s_mov_b32 s31, 0x0 -buffer_load_dword v0, v9, s24, s31 idxen:1 glc:1 -s_waitcnt 0 - -s_endpgm -end - diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_vgpr_01.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_vgpr_01.sp3 deleted file mode 100644 index 6d3ed3f9db..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_vgpr_01.sp3 +++ /dev/null @@ -1,54 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -//fetch the buffer resource through SQC -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -// v3 thread_id_in_group = (tid_z *x*y) + (tid_y*x) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 -v_mad_u32_u24 v3, v2, s3, v3 - -//s28 thread_group_id = (tgid_z*X*Y) + (tgid_y*X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 -s_add_i32 s28, s28, s_tgid_x -s_mul_i32 s29, s6, s_tgid_z -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id *(x*y*z) + thread_id_in_group -v_mov_b32 v9, s28 -v_mad_u32_u24 v9, v9, s4, v3 - -//store and load s8 times -s_mov_b32 s30, s8 -s_mov_b32 s31, 0x0 - -STORE_LOOP: -buffer_store_dword v0, v9, s24, s31 idxen:1 glc:0 -s_waitcnt 0 -s_add_u32 s31, s31, 0x4 -s_sub_u32 s8, s8, 1 -s_cmpk_eq_u32 s8, 0 -s_cbranch_scc0 STORE_LOOP - -s_endpgm -end diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_vgpr_02.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_vgpr_02.sp3 deleted file mode 100644 index 3228e2377d..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_edc_vgpr_02.sp3 +++ /dev/null @@ -1,54 +0,0 @@ -shader main -type(CS) - -user_sgpr_count(9) // 2 for the buffer resource + 5 for thread/thread group parameters - -//s[0:1] the mmeory address for the buffer resource -//s2 x -//s3 x*y -//s4 x*y*z -//s5 X -//s6 X*Y -//s7 output offset -//s8 loop - -tgid_x_en(1) //s_tgid_x s9 -tgid_y_en(1) //s_tgid_y s10 -tgid_z_en(1) //s_tgid_z s11 - -//vo for tid_x -//v1 for tid_y -//v2 for tid_z - -s_mov_b32 s16, s2 - -//SPI may touch v0,v1,v2 before shader is run - -//store it 10 times -v_mov_b32 v10, v1 -v_mov_b32 v11, v2 -v_mov_b32 v12, v1 -v_mov_b32 v13, v2 -v_mov_b32 v14, v1 -v_mov_b32 v15, v2 -v_mov_b32 v16, v1 -v_mov_b32 v17, v2 -v_mov_b32 v18, v1 -v_mov_b32 v19, v0 - -// read them back -v_mov_b32 v29, v10 -v_mov_b32 v28, v11 -v_mov_b32 v27, v12 -v_mov_b32 v26, v13 -v_mov_b32 v25, v14 -v_mov_b32 v24, v15 -v_mov_b32 v23, v16 -v_mov_b32 v22, v17 -v_mov_b32 v21, v18 -v_mov_b32 v20, v19 - -s_store_dword s16, s[0:1], 0x0 glc - -s_endpgm -end diff --git a/ras_prebuild/sp3/gfx9/edc/vega20/gc_wf_lifetime_cs_pm4_01_cs.sp3 b/ras_prebuild/sp3/gfx9/edc/vega20/gc_wf_lifetime_cs_pm4_01_cs.sp3 deleted file mode 100644 index 9069afdb3e..0000000000 --- a/ras_prebuild/sp3/gfx9/edc/vega20/gc_wf_lifetime_cs_pm4_01_cs.sp3 +++ /dev/null @@ -1,58 +0,0 @@ -//s[0:1]: buffer resource -//s2: num_threads_x_full -//s3: num_threads_x_full * num_threads_y_full -//s4: num_threads_x_full * num_threads_y_full * num_threads_z_full -//s5: COMPUTE_DIM_X -//s6: COMPUTE_DIM_X * COMPUTE_DIM_Y -//s7: loop_lifetime -//s8: dispatch_offset -//s[9:11]: thread group ID -//v[0:2]: thread ID - -shader main - -type(CS) -user_sgpr_count(9) -tgid_x_en(1) -tgid_y_en(1) -tgid_z_en(1) - -//sp3 loop for lifetime -s_mov_b32 s12, 0 //init loop idx s12 -label_0004: -s_cmp_lt_i32 s12, s7 //scc = (s12 < s7) ? 1 : 0 -s_cbranch_scc0 label_0006 //if(scc == 0) then jump to label_0006; else nop - -v_mov_b32 v4,s12 -s_add_i32 s12, s12, 1 //add loop incr -s_branch label_0004 - -label_0006: //end of SP3 loop - -//v3 thread_id_in_group = (tid_z * num_threads_x_full * num_threads_y_full) + (tid_y * num_threads_x_full) + tid_x -v_mad_u32_u24 v3, v1, s2, v0 //v3 = tid_y * num_threads_x_full + tid_x -v_mad_u32_u24 v3, v2, s3, v3 //v3 = tid_z * num_threads_x_ful * num_threads_y_full + v3 - -//s28 thread_group_id = (tgid_z * COMPUTE_DIM_X * COMPUTE_DIM_Y) + (tgid_y * COMPUTE_DIM_X) + tgid_x -s_mul_i32 s28, s_tgid_y, s5 //tgid_y * COMPUTE_DIM_X -s_add_i32 s28, s28, s_tgid_x //tgid_y * COMPUTE_DIM_X + tgid_x -s_mul_i32 s29, s6, s_tgid_z //tgid_z * COMPUTE_DIM_X * COMPUTE_DIM_Y -s_add_i32 s28, s29, s28 - -//v9 absolute thread id = thread_group_id * (num_threads_x_full * num_threads_y_full * num_threads_z_full) + thread_id_in_group -v_mov_b32 v9, s28 //thread_group_id -v_mad_u32_u24 v9, v9, s4, v3 - -//fetch the buffer resource -s_load_dwordx4 s[24:27], s[0:1], 0x0 -s_waitcnt 0 - -//write absolute thread id using it as an index -buffer_store_dword v9, v9, s24, s8 idxen:1 -s_waitcnt 0 - -s_mov_b32 s16, 0xa5a50000 -s_store_dword s16, s[0:1], 0x40 glc - -s_endpgm -end diff --git a/rdc_libs/rdc/CMakeLists.txt b/rdc_libs/rdc/CMakeLists.txt index c20ebec7d1..7b75656948 100644 --- a/rdc_libs/rdc/CMakeLists.txt +++ b/rdc_libs/rdc/CMakeLists.txt @@ -18,7 +18,6 @@ set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/RdcModuleMgrImpl.cc" "${SRC_DIR}/RdcNotificationImpl.cc" "${SRC_DIR}/RdcPerfTimer.cc" - "${SRC_DIR}/RdcRasLib.cc" "${SRC_DIR}/RdcRocpLib.cc" "${SRC_DIR}/RdcRocrLib.cc" "${SRC_DIR}/RdcRVSLib.cc" @@ -51,7 +50,6 @@ set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${INC_DIR}/impl/RdcMetricsUpdaterImpl.h" "${INC_DIR}/impl/RdcModuleMgrImpl.h" "${INC_DIR}/impl/RdcNotificationImpl.h" - "${INC_DIR}/impl/RdcRasLib.h" "${INC_DIR}/impl/RdcRocpLib.h" "${INC_DIR}/impl/RdcRocrLib.h" "${INC_DIR}/impl/RdcRVSLib.h" diff --git a/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc b/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc index cbb4d39d78..a0ff671de7 100644 --- a/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc +++ b/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc @@ -91,7 +91,116 @@ uint64_t RdcMetricFetcherImpl::now() { return static_cast(tv.tv_sec) * 1000 + tv.tv_usec / 1000; } -void RdcMetricFetcherImpl::get_ecc_error(uint32_t gpu_index, rdc_field_t field_id, +void RdcMetricFetcherImpl::get_ecc(uint32_t gpu_index, rdc_field_t field_id, + rdc_field_value* value) { + amdsmi_status_t err = AMDSMI_STATUS_SUCCESS; + amdsmi_ras_err_state_t err_state; + + amdsmi_processor_handle processor_handle; + err = get_processor_handle_from_id(gpu_index, &processor_handle); + + // because RDC already had an established order that is different from amd-smi : map blocks to + // fields manually + auto field_to_block_ = [](rdc_field_t field) -> amdsmi_gpu_block_t { + switch (field) { + case RDC_FI_ECC_SDMA_CE: + case RDC_FI_ECC_SDMA_UE: + return AMDSMI_GPU_BLOCK_SDMA; + case RDC_FI_ECC_GFX_CE: + case RDC_FI_ECC_GFX_UE: + return AMDSMI_GPU_BLOCK_GFX; + case RDC_FI_ECC_MMHUB_CE: + case RDC_FI_ECC_MMHUB_UE: + return AMDSMI_GPU_BLOCK_MMHUB; + case RDC_FI_ECC_ATHUB_CE: + case RDC_FI_ECC_ATHUB_UE: + return AMDSMI_GPU_BLOCK_ATHUB; + case RDC_FI_ECC_PCIE_BIF_CE: + case RDC_FI_ECC_PCIE_BIF_UE: + return AMDSMI_GPU_BLOCK_PCIE_BIF; + case RDC_FI_ECC_HDP_CE: + case RDC_FI_ECC_HDP_UE: + return AMDSMI_GPU_BLOCK_HDP; + case RDC_FI_ECC_XGMI_WAFL_CE: + case RDC_FI_ECC_XGMI_WAFL_UE: + return AMDSMI_GPU_BLOCK_XGMI_WAFL; + case RDC_FI_ECC_DF_CE: + case RDC_FI_ECC_DF_UE: + return AMDSMI_GPU_BLOCK_DF; + case RDC_FI_ECC_SMN_CE: + case RDC_FI_ECC_SMN_UE: + return AMDSMI_GPU_BLOCK_SMN; + case RDC_FI_ECC_SEM_CE: + case RDC_FI_ECC_SEM_UE: + return AMDSMI_GPU_BLOCK_SEM; + case RDC_FI_ECC_MP0_CE: + case RDC_FI_ECC_MP0_UE: + return AMDSMI_GPU_BLOCK_MP0; + case RDC_FI_ECC_MP1_CE: + case RDC_FI_ECC_MP1_UE: + return AMDSMI_GPU_BLOCK_MP1; + case RDC_FI_ECC_FUSE_CE: + case RDC_FI_ECC_FUSE_UE: + return AMDSMI_GPU_BLOCK_FUSE; + case RDC_FI_ECC_UMC_CE: + case RDC_FI_ECC_UMC_UE: + return AMDSMI_GPU_BLOCK_UMC; + case RDC_FI_ECC_MCA_CE: + case RDC_FI_ECC_MCA_UE: + return AMDSMI_GPU_BLOCK_MCA; + case RDC_FI_ECC_VCN_CE: + case RDC_FI_ECC_VCN_UE: + return AMDSMI_GPU_BLOCK_VCN; + case RDC_FI_ECC_JPEG_CE: + case RDC_FI_ECC_JPEG_UE: + return AMDSMI_GPU_BLOCK_JPEG; + case RDC_FI_ECC_IH_CE: + case RDC_FI_ECC_IH_UE: + return AMDSMI_GPU_BLOCK_IH; + case RDC_FI_ECC_MPIO_CE: + case RDC_FI_ECC_MPIO_UE: + return AMDSMI_GPU_BLOCK_MPIO; + default: + return AMDSMI_GPU_BLOCK_INVALID; + } + }; + + const bool is_correctable = (field_id % 2 == 0); + + if (!value) { + return; + } + + auto gpu_block = field_to_block_(field_id); + if (gpu_block == AMDSMI_GPU_BLOCK_INVALID) { + value->status = AMDSMI_STATUS_INPUT_OUT_OF_BOUNDS; + } + + err = amdsmi_get_gpu_ecc_status(processor_handle, gpu_block, &err_state); + if (err != AMDSMI_STATUS_SUCCESS) { + RDC_LOG(RDC_INFO, "Error in ecc status [" << gpu_block << "]:" << err); + value->status = err; + return; + } + + amdsmi_error_count_t ec; + err = amdsmi_get_gpu_ecc_count(processor_handle, gpu_block, &ec); + if (err != AMDSMI_STATUS_SUCCESS) { + RDC_LOG(RDC_ERROR, "Error in ecc count [" << gpu_block << "]:" << err); + value->status = err; + return; + } + + value->status = AMDSMI_STATUS_SUCCESS; + value->type = INTEGER; + if (is_correctable) { + value->value.l_int = ec.correctable_count; + } else { + value->value.l_int = ec.uncorrectable_count; + } +} + +void RdcMetricFetcherImpl::get_ecc_total(uint32_t gpu_index, rdc_field_t field_id, rdc_field_value* value) { amdsmi_status_t err = AMDSMI_STATUS_SUCCESS; uint64_t correctable_count = 0; @@ -516,7 +625,47 @@ rdc_status_t RdcMetricFetcherImpl::fetch_smi_field(uint32_t gpu_index, rdc_field } case RDC_FI_ECC_CORRECT_TOTAL: case RDC_FI_ECC_UNCORRECT_TOTAL: - get_ecc_error(gpu_index, field_id, value); + get_ecc_total(gpu_index, field_id, value); + break; + case RDC_FI_ECC_SDMA_CE: + case RDC_FI_ECC_SDMA_UE: + case RDC_FI_ECC_GFX_CE: + case RDC_FI_ECC_GFX_UE: + case RDC_FI_ECC_MMHUB_CE: + case RDC_FI_ECC_MMHUB_UE: + case RDC_FI_ECC_ATHUB_CE: + case RDC_FI_ECC_ATHUB_UE: + case RDC_FI_ECC_PCIE_BIF_CE: + case RDC_FI_ECC_PCIE_BIF_UE: + case RDC_FI_ECC_HDP_CE: + case RDC_FI_ECC_HDP_UE: + case RDC_FI_ECC_XGMI_WAFL_CE: + case RDC_FI_ECC_XGMI_WAFL_UE: + case RDC_FI_ECC_DF_CE: + case RDC_FI_ECC_DF_UE: + case RDC_FI_ECC_SMN_CE: + case RDC_FI_ECC_SMN_UE: + case RDC_FI_ECC_SEM_CE: + case RDC_FI_ECC_SEM_UE: + case RDC_FI_ECC_MP0_CE: + case RDC_FI_ECC_MP0_UE: + case RDC_FI_ECC_MP1_CE: + case RDC_FI_ECC_MP1_UE: + case RDC_FI_ECC_FUSE_CE: + case RDC_FI_ECC_FUSE_UE: + case RDC_FI_ECC_UMC_CE: + case RDC_FI_ECC_UMC_UE: + case RDC_FI_ECC_MCA_CE: + case RDC_FI_ECC_MCA_UE: + case RDC_FI_ECC_VCN_CE: + case RDC_FI_ECC_VCN_UE: + case RDC_FI_ECC_JPEG_CE: + case RDC_FI_ECC_JPEG_UE: + case RDC_FI_ECC_IH_CE: + case RDC_FI_ECC_IH_UE: + case RDC_FI_ECC_MPIO_CE: + case RDC_FI_ECC_MPIO_UE: + get_ecc(gpu_index, field_id, value); break; case RDC_FI_PCIE_TX: case RDC_FI_PCIE_RX: diff --git a/rdc_libs/rdc/src/RdcModuleMgrImpl.cc b/rdc_libs/rdc/src/RdcModuleMgrImpl.cc index 6a1cf84c7b..9019d2ab39 100644 --- a/rdc_libs/rdc/src/RdcModuleMgrImpl.cc +++ b/rdc_libs/rdc/src/RdcModuleMgrImpl.cc @@ -30,7 +30,6 @@ THE SOFTWARE. #include "rdc_lib/RdcTelemetry.h" #include "rdc_lib/impl/RdcDiagnosticModule.h" #include "rdc_lib/impl/RdcRVSLib.h" -#include "rdc_lib/impl/RdcRasLib.h" #include "rdc_lib/impl/RdcRocpLib.h" #include "rdc_lib/impl/RdcRocrLib.h" #include "rdc_lib/impl/RdcSmiLib.h" @@ -89,7 +88,7 @@ RdcModuleMgrImpl::RdcModuleMgrImpl(const RdcMetricFetcherPtr& fetcher) : fetcher } // all other modules get initialized by insert_modules - insert_modules(); + insert_modules(); } RdcTelemetryPtr RdcModuleMgrImpl::get_telemetry_module() { diff --git a/rdc_libs/rdc/src/RdcRasLib.cc b/rdc_libs/rdc/src/RdcRasLib.cc deleted file mode 100644 index ff1b2f54f6..0000000000 --- a/rdc_libs/rdc/src/RdcRasLib.cc +++ /dev/null @@ -1,178 +0,0 @@ -/* -Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -#include "rdc_lib/impl/RdcRasLib.h" - -namespace amd { -namespace rdc { - -RdcRasLib::RdcRasLib() - : fields_value_get_(nullptr), - fields_query_(nullptr), - fields_watch_(nullptr), - fields_unwatch_(nullptr), - rdc_module_init_(nullptr), - rdc_module_destroy_(nullptr) { - rdc_status_t status = lib_loader_.load("librdc_ras.so"); - if (status != RDC_ST_OK) { - RDC_LOG(RDC_ERROR, "RAS related function will not work."); - return; - } - - status = lib_loader_.load_symbol(&rdc_module_init_, "rdc_module_init"); - if (status != RDC_ST_OK) { - rdc_module_init_ = nullptr; - return; - } - - status = rdc_module_init_(0); - if (status != RDC_ST_OK) { - RDC_LOG(RDC_ERROR, "Fail to init librdc_ras.so:" << rdc_status_string(status) - << ". RAS related function will not work."); - return; - } - - status = lib_loader_.load_symbol(&rdc_module_destroy_, "rdc_module_destroy"); - if (status != RDC_ST_OK) { - rdc_module_destroy_ = nullptr; - } - - status = lib_loader_.load_symbol(&fields_value_get_, "rdc_telemetry_fields_value_get"); - if (status != RDC_ST_OK) { - fields_value_get_ = nullptr; - } - status = lib_loader_.load_symbol(&fields_query_, "rdc_telemetry_fields_query"); - if (status != RDC_ST_OK) { - fields_query_ = nullptr; - } - status = lib_loader_.load_symbol(&fields_watch_, "rdc_telemetry_fields_watch"); - if (status != RDC_ST_OK) { - fields_watch_ = nullptr; - } - status = lib_loader_.load_symbol(&fields_unwatch_, "rdc_telemetry_fields_unwatch"); - if (status != RDC_ST_OK) { - fields_unwatch_ = nullptr; - } -} - -RdcRasLib::~RdcRasLib() { - if (rdc_module_destroy_) { - rdc_module_destroy_(); - } -} - -rdc_status_t RdcRasLib::rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FIELDS], - uint32_t* field_count) { - if (field_count == nullptr) { - return RDC_ST_BAD_PARAMETER; - } - if (!fields_query_) { - *field_count = 0; - return RDC_ST_FAIL_LOAD_MODULE; - } - - auto status = fields_query_(field_ids, field_count); - RDC_LOG(RDC_DEBUG, "RAS support " << *field_count << " fields"); - return status; -} - -rdc_status_t RdcRasLib::rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, - uint32_t fields_count, - rdc_field_value_f callback, - void* user_data) { - if (fields == nullptr) { - return RDC_ST_BAD_PARAMETER; - } - if (!fields_value_get_) { - return RDC_ST_FAIL_LOAD_MODULE; - } - rdc_status_t status = fields_value_get_(fields, fields_count, callback, user_data); - RDC_LOG(RDC_DEBUG, - "Bulk fetched " << fields_count << " fields from RAS: " << rdc_status_string(status)); - return status; -} - -rdc_status_t RdcRasLib::rdc_telemetry_fields_watch(rdc_gpu_field_t* fields, uint32_t fields_count) { - if (fields == nullptr) { - return RDC_ST_BAD_PARAMETER; - } - if (!fields_watch_) { - return RDC_ST_FAIL_LOAD_MODULE; - } - rdc_status_t status = fields_watch_(fields, fields_count); - RDC_LOG(RDC_DEBUG, "Watch " << fields_count << " fields from RAS: " << rdc_status_string(status)); - return status; -} - -rdc_status_t RdcRasLib::rdc_telemetry_fields_unwatch(rdc_gpu_field_t* fields, - uint32_t fields_count) { - if (fields == nullptr) { - return RDC_ST_BAD_PARAMETER; - } - if (!fields_unwatch_) { - return RDC_ST_FAIL_LOAD_MODULE; - } - rdc_status_t status = fields_unwatch_(fields, fields_count); - RDC_LOG(RDC_DEBUG, - "Unwatch " << fields_count << " fields from RAS: " << rdc_status_string(status)); - return status; -} - -rdc_status_t RdcRasLib::rdc_diag_test_cases_query(rdc_diag_test_cases_t test_cases[MAX_TEST_CASES], - uint32_t* test_case_count) { - (void)test_cases; - (void)test_case_count; - return RDC_ST_NOT_SUPPORTED; -} - -// Run a specific test case -rdc_status_t RdcRasLib::rdc_test_case_run(rdc_diag_test_cases_t test_case, - uint32_t gpu_index[RDC_MAX_NUM_DEVICES], - uint32_t gpu_count, const char* config, - size_t config_size, rdc_diag_test_result_t* result) { - (void)test_case; - (void)gpu_index; - (void)result; - (void)gpu_count; - (void)config; - (void)config_size; - return RDC_ST_NOT_SUPPORTED; -} - -rdc_status_t RdcRasLib::rdc_diagnostic_run(const rdc_group_info_t& gpus, rdc_diag_level_t level, - const char* config, size_t config_size, - rdc_diag_response_t* response) { - (void)gpus; - (void)level; - (void)config; - (void)config_size; - (void)response; - return RDC_ST_NOT_SUPPORTED; -} - -rdc_status_t RdcRasLib::rdc_diag_init(uint64_t flags) { - (void)flags; - return RDC_ST_NOT_SUPPORTED; -} -rdc_status_t RdcRasLib::rdc_diag_destroy() { return RDC_ST_NOT_SUPPORTED; } - -} // namespace rdc -} // namespace amd diff --git a/rdc_libs/rdc/src/RdcSmiLib.cc b/rdc_libs/rdc/src/RdcSmiLib.cc index 8ede43e270..87e4e4b3fe 100644 --- a/rdc_libs/rdc/src/RdcSmiLib.cc +++ b/rdc_libs/rdc/src/RdcSmiLib.cc @@ -148,28 +148,34 @@ rdc_status_t RdcSmiLib::rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FI // List of fields supported by amd_smi_lib const std::vector fields{ - RDC_FI_GPU_COUNT, RDC_FI_DEV_NAME, - RDC_FI_GPU_CLOCK, RDC_FI_MEM_CLOCK, - RDC_FI_MEMORY_TEMP, RDC_FI_GPU_TEMP, - RDC_FI_POWER_USAGE, RDC_FI_PCIE_TX, - RDC_FI_PCIE_RX, RDC_FI_PCIE_BANDWIDTH, - RDC_FI_GPU_UTIL, - RDC_FI_GPU_MEMORY_USAGE, RDC_FI_GPU_MEMORY_TOTAL, - RDC_FI_ECC_CORRECT_TOTAL, RDC_FI_ECC_UNCORRECT_TOTAL, - RDC_FI_XGMI_0_READ_KB, RDC_FI_XGMI_1_READ_KB, - RDC_FI_XGMI_2_READ_KB, RDC_FI_XGMI_3_READ_KB, - RDC_FI_XGMI_4_READ_KB, RDC_FI_XGMI_5_READ_KB, - RDC_FI_XGMI_6_READ_KB, RDC_FI_XGMI_7_READ_KB, - RDC_FI_XGMI_0_WRITE_KB, RDC_FI_XGMI_1_WRITE_KB, - RDC_FI_XGMI_2_WRITE_KB, RDC_FI_XGMI_3_WRITE_KB, - RDC_FI_XGMI_4_WRITE_KB, RDC_FI_XGMI_5_WRITE_KB, - RDC_FI_XGMI_6_WRITE_KB, RDC_FI_XGMI_7_WRITE_KB, - RDC_EVNT_XGMI_0_NOP_TX, RDC_EVNT_XGMI_0_REQ_TX, - RDC_EVNT_XGMI_0_RESP_TX, RDC_EVNT_XGMI_0_BEATS_TX, - RDC_EVNT_XGMI_1_NOP_TX, RDC_EVNT_XGMI_1_REQ_TX, - RDC_EVNT_XGMI_1_RESP_TX, RDC_EVNT_XGMI_1_BEATS_TX, - RDC_EVNT_XGMI_0_THRPUT, RDC_EVNT_XGMI_1_THRPUT, - RDC_EVNT_XGMI_2_THRPUT, RDC_EVNT_XGMI_3_THRPUT, + + RDC_FI_GPU_COUNT, RDC_FI_DEV_NAME, RDC_FI_GPU_CLOCK, + RDC_FI_MEM_CLOCK, RDC_FI_MEMORY_TEMP, RDC_FI_GPU_TEMP, + RDC_FI_POWER_USAGE, RDC_FI_PCIE_TX, RDC_FI_PCIE_RX, + RDC_FI_PCIE_BANDWIDTH, RDC_FI_GPU_UTIL, RDC_FI_GPU_MEMORY_USAGE, + RDC_FI_GPU_MEMORY_TOTAL, RDC_FI_ECC_CORRECT_TOTAL, RDC_FI_ECC_UNCORRECT_TOTAL, + RDC_FI_ECC_SDMA_CE, RDC_FI_ECC_SDMA_UE, RDC_FI_ECC_GFX_CE, + RDC_FI_ECC_GFX_UE, RDC_FI_ECC_MMHUB_CE, RDC_FI_ECC_MMHUB_UE, + RDC_FI_ECC_ATHUB_CE, RDC_FI_ECC_ATHUB_UE, RDC_FI_ECC_PCIE_BIF_CE, + RDC_FI_ECC_PCIE_BIF_UE, RDC_FI_ECC_HDP_CE, RDC_FI_ECC_HDP_UE, + RDC_FI_ECC_XGMI_WAFL_CE, RDC_FI_ECC_XGMI_WAFL_UE, RDC_FI_ECC_DF_CE, + RDC_FI_ECC_DF_UE, RDC_FI_ECC_SMN_CE, RDC_FI_ECC_SMN_UE, + RDC_FI_ECC_SEM_CE, RDC_FI_ECC_SEM_UE, RDC_FI_ECC_MP0_CE, + RDC_FI_ECC_MP0_UE, RDC_FI_ECC_MP1_CE, RDC_FI_ECC_MP1_UE, + RDC_FI_ECC_FUSE_CE, RDC_FI_ECC_FUSE_UE, RDC_FI_ECC_UMC_CE, + RDC_FI_ECC_UMC_UE, RDC_FI_ECC_MCA_CE, RDC_FI_ECC_MCA_UE, + RDC_FI_ECC_VCN_CE, RDC_FI_ECC_VCN_UE, RDC_FI_ECC_JPEG_CE, + RDC_FI_ECC_JPEG_UE, RDC_FI_ECC_IH_CE, RDC_FI_ECC_IH_UE, + RDC_FI_ECC_MPIO_CE, RDC_FI_ECC_MPIO_UE, RDC_FI_XGMI_0_READ_KB, + RDC_FI_XGMI_1_READ_KB, RDC_FI_XGMI_2_READ_KB, RDC_FI_XGMI_3_READ_KB, + RDC_FI_XGMI_4_READ_KB, RDC_FI_XGMI_5_READ_KB, RDC_FI_XGMI_6_READ_KB, + RDC_FI_XGMI_7_READ_KB, RDC_FI_XGMI_0_WRITE_KB, RDC_FI_XGMI_1_WRITE_KB, + RDC_FI_XGMI_2_WRITE_KB, RDC_FI_XGMI_3_WRITE_KB, RDC_FI_XGMI_4_WRITE_KB, + RDC_FI_XGMI_5_WRITE_KB, RDC_FI_XGMI_6_WRITE_KB, RDC_FI_XGMI_7_WRITE_KB, + RDC_EVNT_XGMI_0_NOP_TX, RDC_EVNT_XGMI_0_REQ_TX, RDC_EVNT_XGMI_0_RESP_TX, + RDC_EVNT_XGMI_0_BEATS_TX, RDC_EVNT_XGMI_1_NOP_TX, RDC_EVNT_XGMI_1_REQ_TX, + RDC_EVNT_XGMI_1_RESP_TX, RDC_EVNT_XGMI_1_BEATS_TX, RDC_EVNT_XGMI_0_THRPUT, + RDC_EVNT_XGMI_1_THRPUT, RDC_EVNT_XGMI_2_THRPUT, RDC_EVNT_XGMI_3_THRPUT, RDC_EVNT_XGMI_4_THRPUT, RDC_EVNT_XGMI_5_THRPUT, }; std::copy(fields.begin(), fields.end(), field_ids); diff --git a/rdc_libs/rdc/src/RdcTelemetryModule.cc b/rdc_libs/rdc/src/RdcTelemetryModule.cc index e0f4e31450..672de32bec 100644 --- a/rdc_libs/rdc/src/RdcTelemetryModule.cc +++ b/rdc_libs/rdc/src/RdcTelemetryModule.cc @@ -24,8 +24,8 @@ THE SOFTWARE. #include #include "rdc_lib/RdcException.h" +#include "rdc_lib/RdcLogger.h" #include "rdc_lib/RdcMetricFetcher.h" -#include "rdc_lib/impl/RdcRasLib.h" #include "rdc_lib/impl/RdcSmiLib.h" namespace amd { diff --git a/tests/rdc_tests/CMakeLists.txt b/tests/rdc_tests/CMakeLists.txt index 59d2c5d618..7795d15b0e 100755 --- a/tests/rdc_tests/CMakeLists.txt +++ b/tests/rdc_tests/CMakeLists.txt @@ -76,7 +76,6 @@ target_include_directories( target_link_libraries(${RDCTST} PUBLIC rdc_bootstrap PUBLIC rdc - PUBLIC rdc_ras PUBLIC GTest::gtest_main PUBLIC c PUBLIC stdc++