From 9df2c1ec68009953a905044b8804a3d1cde4a542 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Thu, 14 Aug 2025 00:02:23 -0500 Subject: [PATCH] [rocprofiler-sdk] Fix formatting, linting, and CI workflows (#345) * [rocprofiler-sdk] Fix formatting and lint workflows - several formatting workflows were silently failing when listing files * format metrics_test.h * Improve formatting job robustness * Source formatting workflow does not use container * Use PyPi clang-format * Format rocpd/source/csv.cpp source * Fix rocprofiler-sdk CI workflow - fix invalid context access * Update run-ci.py - fix ctest_update * Update run-ci.py - handle old checkout in ROCm/rocprofiler-sdk --- ...rocprofiler-sdk-continuous_integration.yml | 31 +- .../workflows/rocprofiler-sdk-formatting.yml | 51 +++- .github/workflows/rocprofiler-sdk-python.yml | 4 +- .../source/lib/python/rocpd/source/csv.cpp | 2 +- .../counters/tests/metrics_test.h | 285 ++++-------------- .../rocprofiler-sdk/source/scripts/run-ci.py | 7 +- 6 files changed, 111 insertions(+), 269 deletions(-) diff --git a/.github/workflows/rocprofiler-sdk-continuous_integration.yml b/.github/workflows/rocprofiler-sdk-continuous_integration.yml index 334a4c813e..6d914663c2 100644 --- a/.github/workflows/rocprofiler-sdk-continuous_integration.yml +++ b/.github/workflows/rocprofiler-sdk-continuous_integration.yml @@ -101,7 +101,7 @@ jobs: ls -la - name: Enable PC Sampling - if: ${{ contains(matrix.system.gpu, 'mi200') || contains(matrix.system.gpu, 'mi300a') }} + if: ${{ contains(matrix.runner, 'mi200') || contains(matrix.runner, 'mi300a') }} shell: bash working-directory: projects/rocprofiler-sdk run: @@ -113,41 +113,40 @@ jobs: working-directory: projects/rocprofiler-sdk run: python3 ./source/scripts/run-ci.py -B build - --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.system.os }}-${{ matrix.system.gpu }}-core + --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-${{ matrix.runner }}-core --build-jobs 16 --site ${RUNNER_HOSTNAME} --gpu-targets ${{ env.GPU_TARGETS }} --run-attempt ${{ github.run_attempt }} - ${{ matrix.system.ci-flags }} -- -DROCPROFILER_DEP_ROCMCORE=ON -DROCPROFILER_BUILD_DOCS=OFF - -DCMAKE_BUILD_TYPE=${{ matrix.system.build-type }} + -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} -DCMAKE_INSTALL_PREFIX=/opt/rocprofiler-sdk -DCPACK_GENERATOR='DEB;RPM;TGZ' -DCPACK_PACKAGING_INSTALL_PREFIX="$(realpath /opt/rocm)" -DPython3_EXECUTABLE=$(which python3) ${{ env.GLOBAL_CMAKE_OPTIONS }} -- - -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" - -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" + -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" + -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" - name: Install - if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }} + if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }} timeout-minutes: 10 working-directory: projects/rocprofiler-sdk run: cmake --build build --target install --parallel 16 - name: Build Packaging - if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }} + if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }} timeout-minutes: 10 working-directory: projects/rocprofiler-sdk run: cmake --build build --target package --parallel 16 - name: Test Install Build - if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }} + if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }} timeout-minutes: 20 shell: bash working-directory: projects/rocprofiler-sdk @@ -157,11 +156,11 @@ jobs: export LD_LIBRARY_PATH=/opt/rocprofiler-sdk/lib:${LD_LIBRARY_PATH} cmake --build build-samples --target all --parallel 16 cmake --build build-tests --target all --parallel 16 - ctest --test-dir build-samples -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" --output-on-failure - ctest --test-dir build-tests -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" --output-on-failure + ctest --test-dir build-samples -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" --output-on-failure + ctest --test-dir build-tests -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" --output-on-failure - name: Install Packages - if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }} + if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }} timeout-minutes: 5 shell: bash working-directory: projects/rocprofiler-sdk @@ -174,7 +173,7 @@ jobs: for i in $(ls -S ./build/rocprofiler-sdk*.deb | egrep -v 'roctx|rocpd'); do dpkg --force-all -i ${i}; done; - name: Test Installed Packages - if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }} + if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }} timeout-minutes: 20 shell: bash working-directory: projects/rocprofiler-sdk @@ -183,11 +182,11 @@ jobs: CMAKE_PREFIX_PATH=/opt/rocm cmake -B build-tests-deb -DGPU_TARGETS="gfx942" /opt/rocm/share/rocprofiler-sdk/tests cmake --build build-samples-deb --target all --parallel 16 cmake --build build-tests-deb --target all --parallel 16 - ctest --test-dir build-samples-deb -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" --output-on-failure - ctest --test-dir build-tests-deb -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" --output-on-failure + ctest --test-dir build-samples-deb -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" --output-on-failure + ctest --test-dir build-tests-deb -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" --output-on-failure - name: Archive production artifacts - if: ${{ github.event_name == 'workflow_dispatch' && contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }} + if: ${{ github.event_name == 'workflow_dispatch' && contains(matrix.runner, env.CORE_EXT_RUNNER) }} uses: actions/upload-artifact@v4 with: name: installers-deb diff --git a/.github/workflows/rocprofiler-sdk-formatting.yml b/.github/workflows/rocprofiler-sdk-formatting.yml index 55627950bc..741f746f03 100644 --- a/.github/workflows/rocprofiler-sdk-formatting.yml +++ b/.github/workflows/rocprofiler-sdk-formatting.yml @@ -34,15 +34,20 @@ jobs: id: extract_branch - name: Install dependencies + working-directory: projects/rocprofiler-sdk run: | sudo apt-get update sudo apt-get install -y python3-pip - python3 -m pip install -U cmake-format + python3 -m pip install -r requirements.txt - name: Run cmake-format + working-directory: projects/rocprofiler-sdk run: | + FORMAT_FILES=$(find . -type f | egrep 'CMakeLists.txt|\.cmake$') + command -v cmake-format + cmake-format --version set +e - cmake-format -i $(find . -type f | egrep 'CMakeLists.txt|\.cmake$') + cmake-format -i ${FORMAT_FILES} if [ $(git diff | wc -l) -ne 0 ]; then echo -e "\nError! CMake code not formatted. Run cmake-format...\n" echo -e "\nFiles:\n" @@ -53,8 +58,7 @@ jobs: fi source: - runs-on: ubuntu-latest - container: rocm/dev-ubuntu-22.04:latest + runs-on: ubuntu-22.04 env: ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -63,24 +67,31 @@ jobs: with: sparse-checkout: projects/rocprofiler-sdk - - name: Install dependencies - run: | - DISTRIB_CODENAME=$(cat /etc/lsb-release | grep DISTRIB_CODENAME | awk -F '=' '{print $NF}') - sudo apt-get update - sudo apt-get install -y software-properties-common python3 python3-pip build-essential - sudo apt install -y wget curl clang-format-11 - - name: Extract branch name shell: bash run: | echo "branch=${GITHUB_HEAD_REF:-${GITHUB_HEAD_REF#refs/heads/}}" >> $GITHUB_OUTPUT id: extract_branch - - name: Run clang-format + - name: Install dependencies + working-directory: projects/rocprofiler-sdk run: | + DISTRIB_CODENAME=$(cat /etc/lsb-release | grep DISTRIB_CODENAME | awk -F '=' '{print $NF}') + sudo apt-get update + sudo apt-get install -y software-properties-common python3 python3-pip + python3 -m pip install -r requirements.txt + + - name: Run clang-format + working-directory: projects/rocprofiler-sdk + run: | + FORMAT_FILES=$(find samples source tests benchmark -type f | egrep '\.(h|hpp|hh|c|cc|cpp)(|\.in)$') + command -v git + command -v clang-format + git --version + clang-format --version set +e - FILES=$(find samples source tests benchmark -type f | egrep '\.(h|hpp|hh|c|cc|cpp)(|\.in)$') - FORMAT_OUT=$(clang-format-11 -i ${FILES}) + FORMAT_OUT=$(clang-format -i ${FORMAT_FILES}) + git status if [ $(git diff | wc -l) -ne 0 ]; then echo -e "\nError! Code not formatted. Run clang-format (version 11)...\n" echo -e "\nFiles:\n" @@ -115,13 +126,14 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies + working-directory: projects/rocprofiler-sdk run: | python -m pip install --upgrade pip - python -m pip install black + python -m pip install -r requirements.txt - name: black format + working-directory: projects/rocprofiler-sdk run: | - cd projects/rocprofiler-sdk black . if [ $(git diff | wc -l) -ne 0 ]; then echo -e "\nError! Python code not formatted. Run black...\n" @@ -140,8 +152,15 @@ jobs: with: sparse-checkout: projects/rocprofiler-sdk + - name: Extract branch name + shell: bash + run: | + echo "branch=${GITHUB_HEAD_REF:-${GITHUB_HEAD_REF#refs/heads/}}" >> $GITHUB_OUTPUT + id: extract_branch + - name: Find missing new line shell: bash + working-directory: projects/rocprofiler-sdk run: | OUTFILE=missing_newline.txt for i in $(find source tests samples benchmark docker cmake -type f | egrep -v '\.(bin|png|csv)$|source/docs/_(build|doxygen)'); do VAL=$(tail -c 1 ${i}); if [ -n "${VAL}" ]; then echo "- ${i}" >> ${OUTFILE}; fi; done diff --git a/.github/workflows/rocprofiler-sdk-python.yml b/.github/workflows/rocprofiler-sdk-python.yml index 256afe8f6e..fc7c46ff5f 100644 --- a/.github/workflows/rocprofiler-sdk-python.yml +++ b/.github/workflows/rocprofiler-sdk-python.yml @@ -31,13 +31,13 @@ jobs: with: python-version: ${{ matrix.python-version }} - name: Install dependencies + working-directory: projects/rocprofiler-sdk run: | - cd projects/rocprofiler-sdk python -m pip install --upgrade pip if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Lint with flake8 + working-directory: projects/rocprofiler-sdk run: | - cd projects/rocprofiler-sdk # stop the build if there are Python syntax errors or undefined names flake8 source --count --show-source --statistics --select=E9,F63,F7,F82 # flake8 options are defined in .flake8 diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/csv.cpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/csv.cpp index 085d6d9ffd..722edcbc23 100644 --- a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/csv.cpp +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/csv.cpp @@ -319,7 +319,7 @@ write_memory_allocation_csv( memory_alloc_gen, [](CsvManager& cm, CsvType type, const rocpd::types::memory_allocation& malloc) { std::string normalized_type = malloc.type; - if (normalized_type == "ALLOC") + if(normalized_type == "ALLOC") { normalized_type = "ALLOCATE"; } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/tests/metrics_test.h b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/tests/metrics_test.h index ee533a6f2b..b14b4da3b3 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/tests/metrics_test.h +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/tests/metrics_test.h @@ -189,14 +189,13 @@ static const std::unordered_map", - "Count number of waves sent <32 active threads sent to SQs. " - "This value represents the number of waves that an each individual SIMD has enqueued during " - "the collection timeframe (for dispatch profiling this is the timeframe of kernel execution, " - "for agent profiling it is the timeframe between start_context and read counter data) with " - "less than 32 threads. A sum of all SQ_WAVES_LT_32 values will give the total number of " - "waves with 32 threads enqueued during the collection timeframe by the application. " - "Returns one value per-SE (aggregates of SIMD values). " - "Useful for checking for wavefront occupancy."}, + "Count number of waves sent <32 active threads sent to SQs. This value represents the " + "number of waves that an each individual SIMD has enqueued during the collection timeframe " + "(for dispatch profiling this is the timeframe of kernel execution, for agent profiling it " + "is the timeframe between start_context and read counter data) with less than 32 threads. A " + "sum of all SQ_WAVES_LT_32 values will give the total number of waves with 32 threads " + "enqueued during the collection timeframe by the application. Returns one value per-SE " + "(aggregates of SIMD values). Useful for checking for wavefront occupancy."}, {"TCC_ALL_TC_OP_WB_WRITEBACK", "TCC", "73", @@ -222,11 +221,7 @@ static const std::unordered_map", "Number of cache hits. (per-SQ, per-Bank, nondeterministic)"}, - {"CPC_CPC_TCIU_IDLE", - "CPC", - "29", - "", - "CPC TCIU interface Idle."}, + {"CPC_CPC_TCIU_IDLE", "CPC", "29", "", "CPC TCIU interface Idle."}, {"SPI_CSN_WAVE", "SPI", "52", @@ -276,11 +271,7 @@ static const std::unordered_map", "Arb cycles with CSn req and no CSn alloc. Source is RA0"}, - {"CPC_ME1_DC0_SPI_BUSY", - "CPC", - "33", - "", - "CPC Me1 Processor Busy."}, + {"CPC_ME1_DC0_SPI_BUSY", "CPC", "33", "", "CPC Me1 Processor Busy."}, {"SQ_WAVES_RESTORED", "SQ", "159", @@ -292,11 +283,7 @@ static const std::unordered_map", - "CPF TCIU interface Idle."}, + {"CPF_CPF_TCIU_IDLE", "CPF", "27", "", "CPF TCIU interface Idle."}, {"TCP_TCC_ATOMIC_WITH_RET_REQ", "TCP", "71", @@ -357,16 +344,8 @@ static const std::unordered_map", "One of the Compute UTCL1s is stalled waiting on translation, XNACK or PENDING " "response."}, - {"TCC_CYCLE", - "TCC", - "1", - "", - "Number of cycles. Not windowable."}, - {"TCP_GATE_EN2", - "TCP", - "1", - "", - "TCP core clocks are turned on. Not Windowed."}, + {"TCC_CYCLE", "TCC", "1", "", "Number of cycles. Not windowable."}, + {"TCP_GATE_EN2", "TCP", "1", "", "TCP core clocks are turned on. Not Windowed."}, {"TCC_WRITEBACK", "TCC", "22", @@ -393,11 +372,7 @@ static const std::unordered_map", "Total write requests with UC mtype from this TCP to all TCCs"}, - {"TCP_UTCL1_TRANSLATION_MISS", - "TCP", - "48", - "", - "Total utcl1 translation misses"}, + {"TCP_UTCL1_TRANSLATION_MISS", "TCP", "48", "", "Total utcl1 translation misses"}, {"GRBM_TA_BUSY", "GRBM", "13", @@ -445,11 +420,7 @@ static const std::unordered_map", "Number of wave-cycles spent waiting for anything (per-simd, nondeterministic). " "Units in quad-cycles(4 cycles)"}, - {"SQ_CYCLES", - "SQ", - "2", - "", - "Clock cycles. Value is returned per-SIMD."}, + {"SQ_CYCLES", "SQ", "2", "", "Clock cycles. Value is returned per-SIMD."}, {"GRBM_SPI_BUSY", "GRBM", "11", @@ -492,11 +463,7 @@ static const std::unordered_map", "Total read requests with NC mtype from this TCP to all TCCs"}, - {"TCP_TD_TCP_STALL_CYCLES", - "TCP", - "7", - "", - "TD stalls TCP"}, + {"TCP_TD_TCP_STALL_CYCLES", "TCP", "7", "", "TD stalls TCP"}, {"SQ_INSTS_SENDMSG", "SQ", "40", @@ -532,11 +499,7 @@ static const std::unordered_map", "Number of cycles a EA write request was stalled because the interface was out of " "DRAM credits."}, - {"TCC_WRITE", - "TCC", - "13", - "", - "Number of write requests."}, + {"TCC_WRITE", "TCC", "13", "", "Number of write requests."}, {"SPI_RA_VGPR_SIMD_FULL_CSN", "SPI", "109", @@ -683,11 +646,7 @@ static const std::unordered_map", - "Number of buffer cycles issued to TC."}, + {"TA_BUFFER_TOTAL_CYCLES", "TA", "49", "", "Number of buffer cycles issued to TC."}, {"SQ_WAIT_INST_ANY", "SQ", "61", @@ -708,11 +667,7 @@ static const std::unordered_map", "Number of TCC/EA write requests (either 32-byte of 64-byte) destined for DRAM (MC)."}, - {"TCP_TCR_TCP_STALL_CYCLES", - "TCP", - "8", - "", - "TCR stalls TCP_TCR_req interface"}, + {"TCP_TCR_TCP_STALL_CYCLES", "TCP", "8", "", "TCR stalls TCP_TCR_req interface"}, {"TCP_TCC_RW_READ_REQ", "TCP", "85", @@ -745,73 +700,37 @@ static const std::unordered_map", "Number of instruction fetch requests from L1I (instruction) cache. This is a value " "returned per-SIMD."}, - {"TCP_TCC_READ_REQ", - "TCP", - "69", - "", - "Total read requests from TCP to all TCCs"}, + {"TCP_TCC_READ_REQ", "TCP", "69", "", "Total read requests from TCP to all TCCs"}, {"SQC_DCACHE_REQ", "SQ", "290", "", "Number of requests (post-bank-serialization). (per-SQ, per-Bank)"}, - {"CPC_CPC_STAT_STALL", - "CPC", - "27", - "", - "CPC Stalled."}, - {"TCP_GATE_EN1", - "TCP", - "0", - "", - "TCP interface clocks are turned on. Not Windowed."}, - {"TCP_PENDING_STALL_CYCLES", - "TCP", - "22", - "", - "Stall due to data pending from L2"}, + {"CPC_CPC_STAT_STALL", "CPC", "27", "", "CPC Stalled."}, + {"TCP_GATE_EN1", "TCP", "0", "", "TCP interface clocks are turned on. Not Windowed."}, + {"TCP_PENDING_STALL_CYCLES", "TCP", "22", "", "Stall due to data pending from L2"}, {"SQC_DCACHE_MISSES_DUPLICATE", "SQ", "293", "", "Number of misses that were duplicates (access to a non-resident, miss pending CL). " "(per-SQ, per-Bank, nondeterministic)"}, - {"CPF_CPF_STAT_IDLE", - "CPF", - "24", - "", - "CPF Idle."}, - {"TCP_VOLATILE", - "TCP", - "28", - "", - "Total number of L1 volatile pixels/buffers from TA"}, - {"CPC_CPC_TCIU_BUSY", - "CPC", - "28", - "", - "CPC TCIU interface Busy."}, + {"CPF_CPF_STAT_IDLE", "CPF", "24", "", "CPF Idle."}, + {"TCP_VOLATILE", "TCP", "28", "", "Total number of L1 volatile pixels/buffers from TA"}, + {"CPC_CPC_TCIU_BUSY", "CPC", "28", "", "CPC TCIU interface Busy."}, {"SQC_DCACHE_REQ_READ_2", "SQ", "324", "", "Number of constant cache 2 dw read requests. (per-SQ)"}, - {"CPC_CPC_STAT_BUSY", - "CPC", - "25", - "", - "CPC Busy."}, + {"CPC_CPC_STAT_BUSY", "CPC", "25", "", "CPC Busy."}, {"TCP_TCP_LATENCY", "TCP", "65", "", "Total TCP wave latency (from first clock of wave entering to first clock of wave " "leaving), divide by TA_TCP_STATE_READ to avg wave latency"}, - {"TCP_UTCL1_TRANSLATION_HIT", - "TCP", - "49", - "", - "Total utcl1 translation hits"}, + {"TCP_UTCL1_TRANSLATION_HIT", "TCP", "49", "", "Total utcl1 translation hits"}, {"SQ_INST_LEVEL_SMEM", "SQ", "43", @@ -841,11 +760,7 @@ static const std::unordered_map", "Count the wavefronts with opcode = load, include atomics and store."}, - {"GRBM_EA_BUSY", - "GRBM", - "35", - "", - "The Efficiency Arbiter (EA) block is busy."}, + {"GRBM_EA_BUSY", "GRBM", "35", "", "The Efficiency Arbiter (EA) block is busy."}, {"SPI_RA_WVLIM_STALL_CSN", "SPI", "133", @@ -862,26 +777,14 @@ static const std::unordered_map", "TD is processing or waiting for data. Perf_Windowing not supported for this " "counter."}, - {"SQC_ICACHE_REQ", - "SQ", - "270", - "", - "Number of requests. (per-SQ, per-Bank)"}, - {"TCC_ATOMIC", - "TCC", - "14", - "", - "Number of atomic requests of all types."}, + {"SQC_ICACHE_REQ", "SQ", "270", "", "Number of requests. (per-SQ, per-Bank)"}, + {"TCC_ATOMIC", "TCC", "14", "", "Number of atomic requests of all types."}, {"TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES", "TCP", "13", "", "Tagram conflict stall on an atomic"}, - {"CPF_CPF_STAT_BUSY", - "CPF", - "23", - "", - "CPF Busy."}, + {"CPF_CPF_STAT_BUSY", "CPF", "23", "", "CPF Busy."}, {"TCC_EA0_WRREQ_LEVEL", "TCC", "35", @@ -968,11 +871,7 @@ static const std::unordered_map", - "Number of atomic requests. (per-SQ, per-Bank)"}, + {"SQC_DCACHE_ATOMIC", "SQ", "298", "", "Number of atomic requests. (per-SQ, per-Bank)"}, {"TCC_EA0_RDREQ_GMI_CREDIT_STALL", "TCC", "42", @@ -1021,11 +920,7 @@ static const std::unordered_map", "One of the UTCL1s is stalled waiting on translation, XNACK or PENDING response."}, - {"TCC_PROBE", - "TCC", - "9", - "", - "Number of probe requests. Not windowable."}, + {"TCC_PROBE", "TCC", "9", "", "Number of probe requests. Not windowable."}, {"TA_BUFFER_ATOMIC_WAVEFRONTS", "TA", "47", @@ -1042,16 +937,8 @@ static const std::unordered_map", "Number of flat opcode wavfronts processed by the TA."}, - {"TA_TOTAL_WAVEFRONTS", - "TA", - "32", - "", - "Total number of wavefronts processed by TA."}, - {"CPC_CPC_STAT_IDLE", - "CPC", - "26", - "", - "CPC Idle."}, + {"TA_TOTAL_WAVEFRONTS", "TA", "32", "", "Total number of wavefronts processed by TA."}, + {"CPC_CPC_STAT_IDLE", "CPC", "26", "", "CPC Idle."}, {"CPC_CPC_UTCL2IU_STALL", "CPC", "32", @@ -1099,31 +986,15 @@ static const std::unordered_map", "Any of the Texture Cache Blocks (TCP/TCI/TCA/TCC) are busy."}, - {"GRBM_CPC_BUSY", - "GRBM", - "30", - "", - "The Command Processor Compute (CPC) is busy."}, - {"TCP_UTCL1_PERMISSION_MISS", - "TCP", - "50", - "", - "Total utcl1 permission misses"}, + {"GRBM_CPC_BUSY", "GRBM", "30", "", "The Command Processor Compute (CPC) is busy."}, + {"TCP_UTCL1_PERMISSION_MISS", "TCP", "50", "", "Total utcl1 permission misses"}, {"SPI_RA_BULKY_CU_FULL_CSN", "SPI", "125", "", "Sum of CU where BULKY can't take csn wave when !fits. Source is RA0"}, - {"TCP_TA_TCP_STATE_READ", - "TCP", - "27", - "", - "Number of state reads"}, - {"TCP_TCC_WRITE_REQ", - "TCP", - "70", - "", - "Total write requests from TCP to all TCCs"}, + {"TCP_TA_TCP_STATE_READ", "TCP", "27", "", "Number of state reads"}, + {"TCP_TCC_WRITE_REQ", "TCP", "70", "", "Total write requests from TCP to all TCCs"}, {"TCP_TCC_RW_ATOMIC_REQ", "TCP", "87", @@ -1181,11 +1052,7 @@ static const std::unordered_map", "Number of cache hits. (per-SQ, per-Bank, nondeterministic)"}, - {"TCA_CYCLE", - "TCA", - "1", - "", - "Number of cycles. Not windowable."}, + {"TCA_CYCLE", "TCA", "1", "", "Number of cycles. Not windowable."}, {"SPI_CSN_BUSY", "SPI", "48", @@ -1238,11 +1105,7 @@ static const std::unordered_map", "Total number of TC requests that were issued by instruction and constant caches. " "(No-Masking, nondeterministic)"}, - {"CPF_CPF_STAT_STALL", - "CPF", - "25", - "", - "CPF Stalled."}, + {"CPF_CPF_STAT_STALL", "CPF", "25", "", "CPF Stalled."}, {"TCC_ALL_TC_OP_INV_EVICT", "TCC", "80", @@ -1268,16 +1131,8 @@ static const std::unordered_map", - "CPC UTCL2 interface Busy."}, - {"TCP_UTCL1_REQUEST", - "TCP", - "47", - "", - "Total CLIENT_UTCL1 NORMAL requests"}, + {"CPC_CPC_UTCL2IU_BUSY", "CPC", "30", "", "CPC UTCL2 interface Busy."}, + {"TCP_UTCL1_REQUEST", "TCP", "47", "", "Total CLIENT_UTCL1 NORMAL requests"}, {"CPF_CPF_TCIU_STALL", "CPF", "28", @@ -1296,11 +1151,7 @@ static const std::unordered_map", "Sum of CU where LDS can't take csn wave when !fits. Source is RA0"}, - {"TD_ATOMIC_WAVEFRONT", - "TD", - "26", - "", - "Count the wavefronts with opcode = atomic."}, + {"TD_ATOMIC_WAVEFRONT", "TD", "26", "", "Count the wavefronts with opcode = atomic."}, {"SQ_INSTS_EXP_GDS", "SQ", "38", @@ -1316,22 +1167,14 @@ static const std::unordered_map", "Valid request stalled TC request interface (no-credits). (No-Masking, " "nondeterministic, unwindowed)"}, - {"CPF_CPF_TCIU_BUSY", - "CPF", - "26", - "", - "CPF TCIU interface Busy."}, + {"CPF_CPF_TCIU_BUSY", "CPF", "26", "", "CPF TCIU interface Busy."}, {"TCC_EA0_WRREQ_GMI_CREDIT_STALL", "TCC", "32", "", "Number of cycles a EA write request was stalled because the interface was out of " "GMI credits."}, - {"GRBM_CPF_BUSY", - "GRBM", - "31", - "", - "The Command Processor Fetchers (CPF) is busy."}, + {"GRBM_CPF_BUSY", "GRBM", "31", "", "The Command Processor Fetchers (CPF) is busy."}, {"SQ_WAVES_LT_48", "SQ", "8", @@ -1350,11 +1193,7 @@ static const std::unordered_map", "Number of transactions going over the TC_EA_wrreq interface that are actually " "atomic requests."}, - {"TD_TC_STALL", - "TD", - "15", - "", - "TD is stalled waiting for TC data."}, + {"TD_TC_STALL", "TD", "15", "", "TD is stalled waiting for TC data."}, {"SPI_RA_TGLIM_CU_FULL_CSN", "SPI", "127", @@ -1370,11 +1209,7 @@ static const std::unordered_map", "Total number of atomic without return pixels/buffers from TA"}, - {"CPC_ME1_BUSY_FOR_PACKET_DECODE", - "CPC", - "13", - "", - "Me1 busy for packet decode."}, + {"CPC_ME1_BUSY_FOR_PACKET_DECODE", "CPC", "13", "", "Me1 busy for packet decode."}, {"SQ_INSTS", "SQ", "25", @@ -1388,11 +1223,7 @@ static const std::unordered_map", "Number of evictions due to requests that are not invalidate or probe requests."}, - {"CPC_CPC_UTCL2IU_IDLE", - "CPC", - "31", - "", - "CPC UTCL2 interface Idle."}, + {"CPC_CPC_UTCL2IU_IDLE", "CPC", "31", "", "CPC UTCL2 interface Idle."}, {"TCC_REQ", "TCC", "3", @@ -1409,11 +1240,7 @@ static const std::unordered_map", - "Count the wavefronts with opcode = store."}, + {"TD_STORE_WAVEFRONT", "TD", "27", "", "Count the wavefronts with opcode = store."}, {"TA_BUFFER_COALESCED_READ_CYCLES", "TA", "52", @@ -1465,11 +1292,7 @@ static const std::unordered_map", "Number of cycles client191 sent a request to this TCC."}, - {"TCC_BUBBLE", - "TCC", - "56", - "", - "Number of 128-byte read requests sent to EA."}}}}; + {"TCC_BUBBLE", "TCC", "56", "", "Number of 128-byte read requests sent to EA."}}}}; static const std::unordered_map>> derived_gfx908 = {{"gfx908", @@ -2153,11 +1976,7 @@ static const std::unordered_map