[rocprofiler-sdk] Fix formatting, linting, and CI workflows (#345)

* [rocprofiler-sdk] Fix formatting and lint workflows

- several formatting workflows were silently failing when listing files

* format metrics_test.h

* Improve formatting job robustness

* Source formatting workflow does not use container

* Use PyPi clang-format

* Format rocpd/source/csv.cpp source

* Fix rocprofiler-sdk CI workflow

- fix invalid context access

* Update run-ci.py

- fix ctest_update

* Update run-ci.py

- handle old checkout in ROCm/rocprofiler-sdk
Bu işleme şunda yer alıyor:
Jonathan R. Madsen
2025-08-14 00:02:23 -05:00
işlemeyi yapan: GitHub
ebeveyn e28900793b
işleme 9df2c1ec68
6 değiştirilmiş dosya ile 111 ekleme ve 269 silme
+15 -16
Dosyayı Görüntüle
@@ -101,7 +101,7 @@ jobs:
ls -la
- name: Enable PC Sampling
if: ${{ contains(matrix.system.gpu, 'mi200') || contains(matrix.system.gpu, 'mi300a') }}
if: ${{ contains(matrix.runner, 'mi200') || contains(matrix.runner, 'mi300a') }}
shell: bash
working-directory: projects/rocprofiler-sdk
run:
@@ -113,41 +113,40 @@ jobs:
working-directory: projects/rocprofiler-sdk
run:
python3 ./source/scripts/run-ci.py -B build
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.system.os }}-${{ matrix.system.gpu }}-core
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-${{ matrix.runner }}-core
--build-jobs 16
--site ${RUNNER_HOSTNAME}
--gpu-targets ${{ env.GPU_TARGETS }}
--run-attempt ${{ github.run_attempt }}
${{ matrix.system.ci-flags }}
--
-DROCPROFILER_DEP_ROCMCORE=ON
-DROCPROFILER_BUILD_DOCS=OFF
-DCMAKE_BUILD_TYPE=${{ matrix.system.build-type }}
-DCMAKE_BUILD_TYPE=${{ matrix.build-type }}
-DCMAKE_INSTALL_PREFIX=/opt/rocprofiler-sdk
-DCPACK_GENERATOR='DEB;RPM;TGZ'
-DCPACK_PACKAGING_INSTALL_PREFIX="$(realpath /opt/rocm)"
-DPython3_EXECUTABLE=$(which python3)
${{ env.GLOBAL_CMAKE_OPTIONS }}
--
-LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}"
-E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}"
-LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}"
-E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}"
- name: Install
if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }}
if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }}
timeout-minutes: 10
working-directory: projects/rocprofiler-sdk
run:
cmake --build build --target install --parallel 16
- name: Build Packaging
if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }}
if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }}
timeout-minutes: 10
working-directory: projects/rocprofiler-sdk
run:
cmake --build build --target package --parallel 16
- name: Test Install Build
if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }}
if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }}
timeout-minutes: 20
shell: bash
working-directory: projects/rocprofiler-sdk
@@ -157,11 +156,11 @@ jobs:
export LD_LIBRARY_PATH=/opt/rocprofiler-sdk/lib:${LD_LIBRARY_PATH}
cmake --build build-samples --target all --parallel 16
cmake --build build-tests --target all --parallel 16
ctest --test-dir build-samples -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
ctest --test-dir build-tests -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
ctest --test-dir build-samples -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
ctest --test-dir build-tests -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
- name: Install Packages
if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }}
if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }}
timeout-minutes: 5
shell: bash
working-directory: projects/rocprofiler-sdk
@@ -174,7 +173,7 @@ jobs:
for i in $(ls -S ./build/rocprofiler-sdk*.deb | egrep -v 'roctx|rocpd'); do dpkg --force-all -i ${i}; done;
- name: Test Installed Packages
if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }}
if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }}
timeout-minutes: 20
shell: bash
working-directory: projects/rocprofiler-sdk
@@ -183,11 +182,11 @@ jobs:
CMAKE_PREFIX_PATH=/opt/rocm cmake -B build-tests-deb -DGPU_TARGETS="gfx942" /opt/rocm/share/rocprofiler-sdk/tests
cmake --build build-samples-deb --target all --parallel 16
cmake --build build-tests-deb --target all --parallel 16
ctest --test-dir build-samples-deb -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
ctest --test-dir build-tests-deb -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
ctest --test-dir build-samples-deb -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
ctest --test-dir build-tests-deb -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
- name: Archive production artifacts
if: ${{ github.event_name == 'workflow_dispatch' && contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }}
if: ${{ github.event_name == 'workflow_dispatch' && contains(matrix.runner, env.CORE_EXT_RUNNER) }}
uses: actions/upload-artifact@v4
with:
name: installers-deb
+35 -16
Dosyayı Görüntüle
@@ -34,15 +34,20 @@ jobs:
id: extract_branch
- name: Install dependencies
working-directory: projects/rocprofiler-sdk
run: |
sudo apt-get update
sudo apt-get install -y python3-pip
python3 -m pip install -U cmake-format
python3 -m pip install -r requirements.txt
- name: Run cmake-format
working-directory: projects/rocprofiler-sdk
run: |
FORMAT_FILES=$(find . -type f | egrep 'CMakeLists.txt|\.cmake$')
command -v cmake-format
cmake-format --version
set +e
cmake-format -i $(find . -type f | egrep 'CMakeLists.txt|\.cmake$')
cmake-format -i ${FORMAT_FILES}
if [ $(git diff | wc -l) -ne 0 ]; then
echo -e "\nError! CMake code not formatted. Run cmake-format...\n"
echo -e "\nFiles:\n"
@@ -53,8 +58,7 @@ jobs:
fi
source:
runs-on: ubuntu-latest
container: rocm/dev-ubuntu-22.04:latest
runs-on: ubuntu-22.04
env:
ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -63,24 +67,31 @@ jobs:
with:
sparse-checkout: projects/rocprofiler-sdk
- name: Install dependencies
run: |
DISTRIB_CODENAME=$(cat /etc/lsb-release | grep DISTRIB_CODENAME | awk -F '=' '{print $NF}')
sudo apt-get update
sudo apt-get install -y software-properties-common python3 python3-pip build-essential
sudo apt install -y wget curl clang-format-11
- name: Extract branch name
shell: bash
run: |
echo "branch=${GITHUB_HEAD_REF:-${GITHUB_HEAD_REF#refs/heads/}}" >> $GITHUB_OUTPUT
id: extract_branch
- name: Run clang-format
- name: Install dependencies
working-directory: projects/rocprofiler-sdk
run: |
DISTRIB_CODENAME=$(cat /etc/lsb-release | grep DISTRIB_CODENAME | awk -F '=' '{print $NF}')
sudo apt-get update
sudo apt-get install -y software-properties-common python3 python3-pip
python3 -m pip install -r requirements.txt
- name: Run clang-format
working-directory: projects/rocprofiler-sdk
run: |
FORMAT_FILES=$(find samples source tests benchmark -type f | egrep '\.(h|hpp|hh|c|cc|cpp)(|\.in)$')
command -v git
command -v clang-format
git --version
clang-format --version
set +e
FILES=$(find samples source tests benchmark -type f | egrep '\.(h|hpp|hh|c|cc|cpp)(|\.in)$')
FORMAT_OUT=$(clang-format-11 -i ${FILES})
FORMAT_OUT=$(clang-format -i ${FORMAT_FILES})
git status
if [ $(git diff | wc -l) -ne 0 ]; then
echo -e "\nError! Code not formatted. Run clang-format (version 11)...\n"
echo -e "\nFiles:\n"
@@ -115,13 +126,14 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
working-directory: projects/rocprofiler-sdk
run: |
python -m pip install --upgrade pip
python -m pip install black
python -m pip install -r requirements.txt
- name: black format
working-directory: projects/rocprofiler-sdk
run: |
cd projects/rocprofiler-sdk
black .
if [ $(git diff | wc -l) -ne 0 ]; then
echo -e "\nError! Python code not formatted. Run black...\n"
@@ -140,8 +152,15 @@ jobs:
with:
sparse-checkout: projects/rocprofiler-sdk
- name: Extract branch name
shell: bash
run: |
echo "branch=${GITHUB_HEAD_REF:-${GITHUB_HEAD_REF#refs/heads/}}" >> $GITHUB_OUTPUT
id: extract_branch
- name: Find missing new line
shell: bash
working-directory: projects/rocprofiler-sdk
run: |
OUTFILE=missing_newline.txt
for i in $(find source tests samples benchmark docker cmake -type f | egrep -v '\.(bin|png|csv)$|source/docs/_(build|doxygen)'); do VAL=$(tail -c 1 ${i}); if [ -n "${VAL}" ]; then echo "- ${i}" >> ${OUTFILE}; fi; done
+2 -2
Dosyayı Görüntüle
@@ -31,13 +31,13 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
working-directory: projects/rocprofiler-sdk
run: |
cd projects/rocprofiler-sdk
python -m pip install --upgrade pip
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with flake8
working-directory: projects/rocprofiler-sdk
run: |
cd projects/rocprofiler-sdk
# stop the build if there are Python syntax errors or undefined names
flake8 source --count --show-source --statistics --select=E9,F63,F7,F82
# flake8 options are defined in .flake8
+1 -1
Dosyayı Görüntüle
@@ -319,7 +319,7 @@ write_memory_allocation_csv(
memory_alloc_gen,
[](CsvManager& cm, CsvType type, const rocpd::types::memory_allocation& malloc) {
std::string normalized_type = malloc.type;
if (normalized_type == "ALLOC")
if(normalized_type == "ALLOC")
{
normalized_type = "ALLOCATE";
}
+52 -233
Dosyayı Görüntüle
@@ -189,14 +189,13 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"SQ",
"9",
"<None>",
"Count number of waves sent <32 active threads sent to SQs. "
"This value represents the number of waves that an each individual SIMD has enqueued during "
"the collection timeframe (for dispatch profiling this is the timeframe of kernel execution, "
"for agent profiling it is the timeframe between start_context and read counter data) with "
"less than 32 threads. A sum of all SQ_WAVES_LT_32 values will give the total number of "
"waves with 32 threads enqueued during the collection timeframe by the application. "
"Returns one value per-SE (aggregates of SIMD values). "
"Useful for checking for wavefront occupancy."},
"Count number of waves sent <32 active threads sent to SQs. This value represents the "
"number of waves that an each individual SIMD has enqueued during the collection timeframe "
"(for dispatch profiling this is the timeframe of kernel execution, for agent profiling it "
"is the timeframe between start_context and read counter data) with less than 32 threads. A "
"sum of all SQ_WAVES_LT_32 values will give the total number of waves with 32 threads "
"enqueued during the collection timeframe by the application. Returns one value per-SE "
"(aggregates of SIMD values). Useful for checking for wavefront occupancy."},
{"TCC_ALL_TC_OP_WB_WRITEBACK",
"TCC",
"73",
@@ -222,11 +221,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"291",
"<None>",
"Number of cache hits. (per-SQ, per-Bank, nondeterministic)"},
{"CPC_CPC_TCIU_IDLE",
"CPC",
"29",
"<None>",
"CPC TCIU interface Idle."},
{"CPC_CPC_TCIU_IDLE", "CPC", "29", "<None>", "CPC TCIU interface Idle."},
{"SPI_CSN_WAVE",
"SPI",
"52",
@@ -276,11 +271,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"85",
"<None>",
"Arb cycles with CSn req and no CSn alloc. Source is RA0"},
{"CPC_ME1_DC0_SPI_BUSY",
"CPC",
"33",
"<None>",
"CPC Me1 Processor Busy."},
{"CPC_ME1_DC0_SPI_BUSY", "CPC", "33", "<None>", "CPC Me1 Processor Busy."},
{"SQ_WAVES_RESTORED",
"SQ",
"159",
@@ -292,11 +283,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"counter data). Context saving/restoring is a slow operation and should be limited. "
"High values can also indicate that stalling may be taking place (waiting for free "
"register space). Returns one value per-SE (aggregates of SIMD values)."},
{"CPF_CPF_TCIU_IDLE",
"CPF",
"27",
"<None>",
"CPF TCIU interface Idle."},
{"CPF_CPF_TCIU_IDLE", "CPF", "27", "<None>", "CPF TCIU interface Idle."},
{"TCP_TCC_ATOMIC_WITH_RET_REQ",
"TCP",
"71",
@@ -357,16 +344,8 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"<None>",
"One of the Compute UTCL1s is stalled waiting on translation, XNACK or PENDING "
"response."},
{"TCC_CYCLE",
"TCC",
"1",
"<None>",
"Number of cycles. Not windowable."},
{"TCP_GATE_EN2",
"TCP",
"1",
"<None>",
"TCP core clocks are turned on. Not Windowed."},
{"TCC_CYCLE", "TCC", "1", "<None>", "Number of cycles. Not windowable."},
{"TCP_GATE_EN2", "TCP", "1", "<None>", "TCP core clocks are turned on. Not Windowed."},
{"TCC_WRITEBACK",
"TCC",
"22",
@@ -393,11 +372,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"79",
"<None>",
"Total write requests with UC mtype from this TCP to all TCCs"},
{"TCP_UTCL1_TRANSLATION_MISS",
"TCP",
"48",
"<None>",
"Total utcl1 translation misses"},
{"TCP_UTCL1_TRANSLATION_MISS", "TCP", "48", "<None>", "Total utcl1 translation misses"},
{"GRBM_TA_BUSY",
"GRBM",
"13",
@@ -445,11 +420,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"<None>",
"Number of wave-cycles spent waiting for anything (per-simd, nondeterministic). "
"Units in quad-cycles(4 cycles)"},
{"SQ_CYCLES",
"SQ",
"2",
"<None>",
"Clock cycles. Value is returned per-SIMD."},
{"SQ_CYCLES", "SQ", "2", "<None>", "Clock cycles. Value is returned per-SIMD."},
{"GRBM_SPI_BUSY",
"GRBM",
"11",
@@ -492,11 +463,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"75",
"<None>",
"Total read requests with NC mtype from this TCP to all TCCs"},
{"TCP_TD_TCP_STALL_CYCLES",
"TCP",
"7",
"<None>",
"TD stalls TCP"},
{"TCP_TD_TCP_STALL_CYCLES", "TCP", "7", "<None>", "TD stalls TCP"},
{"SQ_INSTS_SENDMSG",
"SQ",
"40",
@@ -532,11 +499,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"<None>",
"Number of cycles a EA write request was stalled because the interface was out of "
"DRAM credits."},
{"TCC_WRITE",
"TCC",
"13",
"<None>",
"Number of write requests."},
{"TCC_WRITE", "TCC", "13", "<None>", "Number of write requests."},
{"SPI_RA_VGPR_SIMD_FULL_CSN",
"SPI",
"109",
@@ -683,11 +646,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"The sum of the number of TCC/EA read requests in flight. This is primarily meant "
"for measure average EA read latency. Average read latency = "
"TCC_PERF_SEL_EA_RDREQ_LEVEL/TCC_PERF_SEL_EA_RDREQ."},
{"TA_BUFFER_TOTAL_CYCLES",
"TA",
"49",
"<None>",
"Number of buffer cycles issued to TC."},
{"TA_BUFFER_TOTAL_CYCLES", "TA", "49", "<None>", "Number of buffer cycles issued to TC."},
{"SQ_WAIT_INST_ANY",
"SQ",
"61",
@@ -708,11 +667,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"103",
"<None>",
"Number of TCC/EA write requests (either 32-byte of 64-byte) destined for DRAM (MC)."},
{"TCP_TCR_TCP_STALL_CYCLES",
"TCP",
"8",
"<None>",
"TCR stalls TCP_TCR_req interface"},
{"TCP_TCR_TCP_STALL_CYCLES", "TCP", "8", "<None>", "TCR stalls TCP_TCR_req interface"},
{"TCP_TCC_RW_READ_REQ",
"TCP",
"85",
@@ -745,73 +700,37 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"<None>",
"Number of instruction fetch requests from L1I (instruction) cache. This is a value "
"returned per-SIMD."},
{"TCP_TCC_READ_REQ",
"TCP",
"69",
"<None>",
"Total read requests from TCP to all TCCs"},
{"TCP_TCC_READ_REQ", "TCP", "69", "<None>", "Total read requests from TCP to all TCCs"},
{"SQC_DCACHE_REQ",
"SQ",
"290",
"<None>",
"Number of requests (post-bank-serialization). (per-SQ, per-Bank)"},
{"CPC_CPC_STAT_STALL",
"CPC",
"27",
"<None>",
"CPC Stalled."},
{"TCP_GATE_EN1",
"TCP",
"0",
"<None>",
"TCP interface clocks are turned on. Not Windowed."},
{"TCP_PENDING_STALL_CYCLES",
"TCP",
"22",
"<None>",
"Stall due to data pending from L2"},
{"CPC_CPC_STAT_STALL", "CPC", "27", "<None>", "CPC Stalled."},
{"TCP_GATE_EN1", "TCP", "0", "<None>", "TCP interface clocks are turned on. Not Windowed."},
{"TCP_PENDING_STALL_CYCLES", "TCP", "22", "<None>", "Stall due to data pending from L2"},
{"SQC_DCACHE_MISSES_DUPLICATE",
"SQ",
"293",
"<None>",
"Number of misses that were duplicates (access to a non-resident, miss pending CL). "
"(per-SQ, per-Bank, nondeterministic)"},
{"CPF_CPF_STAT_IDLE",
"CPF",
"24",
"<None>",
"CPF Idle."},
{"TCP_VOLATILE",
"TCP",
"28",
"<None>",
"Total number of L1 volatile pixels/buffers from TA"},
{"CPC_CPC_TCIU_BUSY",
"CPC",
"28",
"<None>",
"CPC TCIU interface Busy."},
{"CPF_CPF_STAT_IDLE", "CPF", "24", "<None>", "CPF Idle."},
{"TCP_VOLATILE", "TCP", "28", "<None>", "Total number of L1 volatile pixels/buffers from TA"},
{"CPC_CPC_TCIU_BUSY", "CPC", "28", "<None>", "CPC TCIU interface Busy."},
{"SQC_DCACHE_REQ_READ_2",
"SQ",
"324",
"<None>",
"Number of constant cache 2 dw read requests. (per-SQ)"},
{"CPC_CPC_STAT_BUSY",
"CPC",
"25",
"<None>",
"CPC Busy."},
{"CPC_CPC_STAT_BUSY", "CPC", "25", "<None>", "CPC Busy."},
{"TCP_TCP_LATENCY",
"TCP",
"65",
"<None>",
"Total TCP wave latency (from first clock of wave entering to first clock of wave "
"leaving), divide by TA_TCP_STATE_READ to avg wave latency"},
{"TCP_UTCL1_TRANSLATION_HIT",
"TCP",
"49",
"<None>",
"Total utcl1 translation hits"},
{"TCP_UTCL1_TRANSLATION_HIT", "TCP", "49", "<None>", "Total utcl1 translation hits"},
{"SQ_INST_LEVEL_SMEM",
"SQ",
"43",
@@ -841,11 +760,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"25",
"<None>",
"Count the wavefronts with opcode = load, include atomics and store."},
{"GRBM_EA_BUSY",
"GRBM",
"35",
"<None>",
"The Efficiency Arbiter (EA) block is busy."},
{"GRBM_EA_BUSY", "GRBM", "35", "<None>", "The Efficiency Arbiter (EA) block is busy."},
{"SPI_RA_WVLIM_STALL_CSN",
"SPI",
"133",
@@ -862,26 +777,14 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"<None>",
"TD is processing or waiting for data. Perf_Windowing not supported for this "
"counter."},
{"SQC_ICACHE_REQ",
"SQ",
"270",
"<None>",
"Number of requests. (per-SQ, per-Bank)"},
{"TCC_ATOMIC",
"TCC",
"14",
"<None>",
"Number of atomic requests of all types."},
{"SQC_ICACHE_REQ", "SQ", "270", "<None>", "Number of requests. (per-SQ, per-Bank)"},
{"TCC_ATOMIC", "TCC", "14", "<None>", "Number of atomic requests of all types."},
{"TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES",
"TCP",
"13",
"<None>",
"Tagram conflict stall on an atomic"},
{"CPF_CPF_STAT_BUSY",
"CPF",
"23",
"<None>",
"CPF Busy."},
{"CPF_CPF_STAT_BUSY", "CPF", "23", "<None>", "CPF Busy."},
{"TCC_EA0_WRREQ_LEVEL",
"TCC",
"35",
@@ -968,11 +871,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"SQ_WAVES_LT_16 values will give the total number of waves with 16 threads enqueued "
"during the collection timeframe by the application. Returns one value per-SE "
"(aggregates of SIMD values). Useful for checking for wavefront occupancy."},
{"SQC_DCACHE_ATOMIC",
"SQ",
"298",
"<None>",
"Number of atomic requests. (per-SQ, per-Bank)"},
{"SQC_DCACHE_ATOMIC", "SQ", "298", "<None>", "Number of atomic requests. (per-SQ, per-Bank)"},
{"TCC_EA0_RDREQ_GMI_CREDIT_STALL",
"TCC",
"42",
@@ -1021,11 +920,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"24",
"<None>",
"One of the UTCL1s is stalled waiting on translation, XNACK or PENDING response."},
{"TCC_PROBE",
"TCC",
"9",
"<None>",
"Number of probe requests. Not windowable."},
{"TCC_PROBE", "TCC", "9", "<None>", "Number of probe requests. Not windowable."},
{"TA_BUFFER_ATOMIC_WAVEFRONTS",
"TA",
"47",
@@ -1042,16 +937,8 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"100",
"<None>",
"Number of flat opcode wavfronts processed by the TA."},
{"TA_TOTAL_WAVEFRONTS",
"TA",
"32",
"<None>",
"Total number of wavefronts processed by TA."},
{"CPC_CPC_STAT_IDLE",
"CPC",
"26",
"<None>",
"CPC Idle."},
{"TA_TOTAL_WAVEFRONTS", "TA", "32", "<None>", "Total number of wavefronts processed by TA."},
{"CPC_CPC_STAT_IDLE", "CPC", "26", "<None>", "CPC Idle."},
{"CPC_CPC_UTCL2IU_STALL",
"CPC",
"32",
@@ -1099,31 +986,15 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"28",
"<None>",
"Any of the Texture Cache Blocks (TCP/TCI/TCA/TCC) are busy."},
{"GRBM_CPC_BUSY",
"GRBM",
"30",
"<None>",
"The Command Processor Compute (CPC) is busy."},
{"TCP_UTCL1_PERMISSION_MISS",
"TCP",
"50",
"<None>",
"Total utcl1 permission misses"},
{"GRBM_CPC_BUSY", "GRBM", "30", "<None>", "The Command Processor Compute (CPC) is busy."},
{"TCP_UTCL1_PERMISSION_MISS", "TCP", "50", "<None>", "Total utcl1 permission misses"},
{"SPI_RA_BULKY_CU_FULL_CSN",
"SPI",
"125",
"<None>",
"Sum of CU where BULKY can't take csn wave when !fits. Source is RA0"},
{"TCP_TA_TCP_STATE_READ",
"TCP",
"27",
"<None>",
"Number of state reads"},
{"TCP_TCC_WRITE_REQ",
"TCP",
"70",
"<None>",
"Total write requests from TCP to all TCCs"},
{"TCP_TA_TCP_STATE_READ", "TCP", "27", "<None>", "Number of state reads"},
{"TCP_TCC_WRITE_REQ", "TCP", "70", "<None>", "Total write requests from TCP to all TCCs"},
{"TCP_TCC_RW_ATOMIC_REQ",
"TCP",
"87",
@@ -1181,11 +1052,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"271",
"<None>",
"Number of cache hits. (per-SQ, per-Bank, nondeterministic)"},
{"TCA_CYCLE",
"TCA",
"1",
"<None>",
"Number of cycles. Not windowable."},
{"TCA_CYCLE", "TCA", "1", "<None>", "Number of cycles. Not windowable."},
{"SPI_CSN_BUSY",
"SPI",
"48",
@@ -1238,11 +1105,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"<None>",
"Total number of TC requests that were issued by instruction and constant caches. "
"(No-Masking, nondeterministic)"},
{"CPF_CPF_STAT_STALL",
"CPF",
"25",
"<None>",
"CPF Stalled."},
{"CPF_CPF_STAT_STALL", "CPF", "25", "<None>", "CPF Stalled."},
{"TCC_ALL_TC_OP_INV_EVICT",
"TCC",
"80",
@@ -1268,16 +1131,8 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"The number of cycles needed to send addr and cmd data for VMEM read instructions. "
"This value is returned on a per-SE (aggregate of values in SIMDs in the SE) basis "
"with units in quad-cycles(4 cycles)."},
{"CPC_CPC_UTCL2IU_BUSY",
"CPC",
"30",
"<None>",
"CPC UTCL2 interface Busy."},
{"TCP_UTCL1_REQUEST",
"TCP",
"47",
"<None>",
"Total CLIENT_UTCL1 NORMAL requests"},
{"CPC_CPC_UTCL2IU_BUSY", "CPC", "30", "<None>", "CPC UTCL2 interface Busy."},
{"TCP_UTCL1_REQUEST", "TCP", "47", "<None>", "Total CLIENT_UTCL1 NORMAL requests"},
{"CPF_CPF_TCIU_STALL",
"CPF",
"28",
@@ -1296,11 +1151,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"120",
"<None>",
"Sum of CU where LDS can't take csn wave when !fits. Source is RA0"},
{"TD_ATOMIC_WAVEFRONT",
"TD",
"26",
"<None>",
"Count the wavefronts with opcode = atomic."},
{"TD_ATOMIC_WAVEFRONT", "TD", "26", "<None>", "Count the wavefronts with opcode = atomic."},
{"SQ_INSTS_EXP_GDS",
"SQ",
"38",
@@ -1316,22 +1167,14 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"<None>",
"Valid request stalled TC request interface (no-credits). (No-Masking, "
"nondeterministic, unwindowed)"},
{"CPF_CPF_TCIU_BUSY",
"CPF",
"26",
"<None>",
"CPF TCIU interface Busy."},
{"CPF_CPF_TCIU_BUSY", "CPF", "26", "<None>", "CPF TCIU interface Busy."},
{"TCC_EA0_WRREQ_GMI_CREDIT_STALL",
"TCC",
"32",
"<None>",
"Number of cycles a EA write request was stalled because the interface was out of "
"GMI credits."},
{"GRBM_CPF_BUSY",
"GRBM",
"31",
"<None>",
"The Command Processor Fetchers (CPF) is busy."},
{"GRBM_CPF_BUSY", "GRBM", "31", "<None>", "The Command Processor Fetchers (CPF) is busy."},
{"SQ_WAVES_LT_48",
"SQ",
"8",
@@ -1350,11 +1193,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"<None>",
"Number of transactions going over the TC_EA_wrreq interface that are actually "
"atomic requests."},
{"TD_TC_STALL",
"TD",
"15",
"<None>",
"TD is stalled waiting for TC data."},
{"TD_TC_STALL", "TD", "15", "<None>", "TD is stalled waiting for TC data."},
{"SPI_RA_TGLIM_CU_FULL_CSN",
"SPI",
"127",
@@ -1370,11 +1209,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"39",
"<None>",
"Total number of atomic without return pixels/buffers from TA"},
{"CPC_ME1_BUSY_FOR_PACKET_DECODE",
"CPC",
"13",
"<None>",
"Me1 busy for packet decode."},
{"CPC_ME1_BUSY_FOR_PACKET_DECODE", "CPC", "13", "<None>", "Me1 busy for packet decode."},
{"SQ_INSTS",
"SQ",
"25",
@@ -1388,11 +1223,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"74",
"<None>",
"Number of evictions due to requests that are not invalidate or probe requests."},
{"CPC_CPC_UTCL2IU_IDLE",
"CPC",
"31",
"<None>",
"CPC UTCL2 interface Idle."},
{"CPC_CPC_UTCL2IU_IDLE", "CPC", "31", "<None>", "CPC UTCL2 interface Idle."},
{"TCC_REQ",
"TCC",
"3",
@@ -1409,11 +1240,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"but that is not the case for this counter. Probes can stall the pipeline at a "
"variety of places, and there is no single point that can reasonably measure the "
"total stalls accurately."},
{"TD_STORE_WAVEFRONT",
"TD",
"27",
"<None>",
"Count the wavefronts with opcode = store."},
{"TD_STORE_WAVEFRONT", "TD", "27", "<None>", "Count the wavefronts with opcode = store."},
{"TA_BUFFER_COALESCED_READ_CYCLES",
"TA",
"52",
@@ -1465,11 +1292,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"319",
"<None>",
"Number of cycles client191 sent a request to this TCC."},
{"TCC_BUBBLE",
"TCC",
"56",
"<None>",
"Number of 128-byte read requests sent to EA."}}}};
{"TCC_BUBBLE", "TCC", "56", "<None>", "Number of 128-byte read requests sent to EA."}}}};
static const std::unordered_map<std::string, std::vector<std::vector<std::string>>> derived_gfx908 =
{{"gfx908",
@@ -2153,11 +1976,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
"",
"reduce(TCC_BUSY,sum)",
"Number of cycles we have a request pending. Not windowable. Sum over TCC instances."},
{"TCC_BUSY_avr",
"",
"",
"reduce(TCC_BUSY,avr)",
"TCC_BUSY avr over all memory channels."},
{"TCC_BUSY_avr", "", "", "reduce(TCC_BUSY,avr)", "TCC_BUSY avr over all memory channels."},
{"TCC_PROBE_sum",
"",
"",
+6 -1
Dosyayı Görüntüle
@@ -237,6 +237,11 @@ def generate_dashboard_script(args):
STRICT_SUBMIT = 1 if args.require_cdash_submission else 0
ARGN = "${ARGN}"
SUBMIT_ERR = "${_cdash_submit_err}"
REPO_SOURCE_DIR = (
os.path.dirname(os.path.dirname((SOURCE_DIR)))
if not os.path.exists(os.path.join(SOURCE_DIR, ".git"))
else SOURCE_DIR
)
if args.memcheck == "ThreadSanitizer":
MEMCHECK = 0
@@ -279,7 +284,7 @@ def generate_dashboard_script(args):
_script += f"""
set(STAGES "{STAGES}")
ctest_start({DASHBOARD_MODE})
ctest_update(SOURCE "{SOURCE_DIR}" RETURN_VALUE _update_ret
ctest_update(SOURCE "{REPO_SOURCE_DIR}" RETURN_VALUE _update_ret
CAPTURE_CMAKE_ERROR _update_err)
ctest_configure(BUILD "{BINARY_DIR}" RETURN_VALUE _configure_ret)
dashboard_submit(PARTS Start Update Configure RETURN_VALUE _submit_ret)