[rocprofiler-sdk] Fix formatting, linting, and CI workflows (#345)
* [rocprofiler-sdk] Fix formatting and lint workflows - several formatting workflows were silently failing when listing files * format metrics_test.h * Improve formatting job robustness * Source formatting workflow does not use container * Use PyPi clang-format * Format rocpd/source/csv.cpp source * Fix rocprofiler-sdk CI workflow - fix invalid context access * Update run-ci.py - fix ctest_update * Update run-ci.py - handle old checkout in ROCm/rocprofiler-sdk
Bu işleme şunda yer alıyor:
işlemeyi yapan:
GitHub
ebeveyn
e28900793b
işleme
9df2c1ec68
@@ -101,7 +101,7 @@ jobs:
|
||||
ls -la
|
||||
|
||||
- name: Enable PC Sampling
|
||||
if: ${{ contains(matrix.system.gpu, 'mi200') || contains(matrix.system.gpu, 'mi300a') }}
|
||||
if: ${{ contains(matrix.runner, 'mi200') || contains(matrix.runner, 'mi300a') }}
|
||||
shell: bash
|
||||
working-directory: projects/rocprofiler-sdk
|
||||
run:
|
||||
@@ -113,41 +113,40 @@ jobs:
|
||||
working-directory: projects/rocprofiler-sdk
|
||||
run:
|
||||
python3 ./source/scripts/run-ci.py -B build
|
||||
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.system.os }}-${{ matrix.system.gpu }}-core
|
||||
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-${{ matrix.runner }}-core
|
||||
--build-jobs 16
|
||||
--site ${RUNNER_HOSTNAME}
|
||||
--gpu-targets ${{ env.GPU_TARGETS }}
|
||||
--run-attempt ${{ github.run_attempt }}
|
||||
${{ matrix.system.ci-flags }}
|
||||
--
|
||||
-DROCPROFILER_DEP_ROCMCORE=ON
|
||||
-DROCPROFILER_BUILD_DOCS=OFF
|
||||
-DCMAKE_BUILD_TYPE=${{ matrix.system.build-type }}
|
||||
-DCMAKE_BUILD_TYPE=${{ matrix.build-type }}
|
||||
-DCMAKE_INSTALL_PREFIX=/opt/rocprofiler-sdk
|
||||
-DCPACK_GENERATOR='DEB;RPM;TGZ'
|
||||
-DCPACK_PACKAGING_INSTALL_PREFIX="$(realpath /opt/rocm)"
|
||||
-DPython3_EXECUTABLE=$(which python3)
|
||||
${{ env.GLOBAL_CMAKE_OPTIONS }}
|
||||
--
|
||||
-LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}"
|
||||
-E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}"
|
||||
-LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}"
|
||||
-E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}"
|
||||
|
||||
- name: Install
|
||||
if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }}
|
||||
if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }}
|
||||
timeout-minutes: 10
|
||||
working-directory: projects/rocprofiler-sdk
|
||||
run:
|
||||
cmake --build build --target install --parallel 16
|
||||
|
||||
- name: Build Packaging
|
||||
if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }}
|
||||
if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }}
|
||||
timeout-minutes: 10
|
||||
working-directory: projects/rocprofiler-sdk
|
||||
run:
|
||||
cmake --build build --target package --parallel 16
|
||||
|
||||
- name: Test Install Build
|
||||
if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }}
|
||||
if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }}
|
||||
timeout-minutes: 20
|
||||
shell: bash
|
||||
working-directory: projects/rocprofiler-sdk
|
||||
@@ -157,11 +156,11 @@ jobs:
|
||||
export LD_LIBRARY_PATH=/opt/rocprofiler-sdk/lib:${LD_LIBRARY_PATH}
|
||||
cmake --build build-samples --target all --parallel 16
|
||||
cmake --build build-tests --target all --parallel 16
|
||||
ctest --test-dir build-samples -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
|
||||
ctest --test-dir build-tests -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
|
||||
ctest --test-dir build-samples -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
|
||||
ctest --test-dir build-tests -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
|
||||
|
||||
- name: Install Packages
|
||||
if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }}
|
||||
if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }}
|
||||
timeout-minutes: 5
|
||||
shell: bash
|
||||
working-directory: projects/rocprofiler-sdk
|
||||
@@ -174,7 +173,7 @@ jobs:
|
||||
for i in $(ls -S ./build/rocprofiler-sdk*.deb | egrep -v 'roctx|rocpd'); do dpkg --force-all -i ${i}; done;
|
||||
|
||||
- name: Test Installed Packages
|
||||
if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }}
|
||||
if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }}
|
||||
timeout-minutes: 20
|
||||
shell: bash
|
||||
working-directory: projects/rocprofiler-sdk
|
||||
@@ -183,11 +182,11 @@ jobs:
|
||||
CMAKE_PREFIX_PATH=/opt/rocm cmake -B build-tests-deb -DGPU_TARGETS="gfx942" /opt/rocm/share/rocprofiler-sdk/tests
|
||||
cmake --build build-samples-deb --target all --parallel 16
|
||||
cmake --build build-tests-deb --target all --parallel 16
|
||||
ctest --test-dir build-samples-deb -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
|
||||
ctest --test-dir build-tests-deb -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
|
||||
ctest --test-dir build-samples-deb -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
|
||||
ctest --test-dir build-tests-deb -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
|
||||
|
||||
- name: Archive production artifacts
|
||||
if: ${{ github.event_name == 'workflow_dispatch' && contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }}
|
||||
if: ${{ github.event_name == 'workflow_dispatch' && contains(matrix.runner, env.CORE_EXT_RUNNER) }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: installers-deb
|
||||
|
||||
@@ -34,15 +34,20 @@ jobs:
|
||||
id: extract_branch
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: projects/rocprofiler-sdk
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y python3-pip
|
||||
python3 -m pip install -U cmake-format
|
||||
python3 -m pip install -r requirements.txt
|
||||
|
||||
- name: Run cmake-format
|
||||
working-directory: projects/rocprofiler-sdk
|
||||
run: |
|
||||
FORMAT_FILES=$(find . -type f | egrep 'CMakeLists.txt|\.cmake$')
|
||||
command -v cmake-format
|
||||
cmake-format --version
|
||||
set +e
|
||||
cmake-format -i $(find . -type f | egrep 'CMakeLists.txt|\.cmake$')
|
||||
cmake-format -i ${FORMAT_FILES}
|
||||
if [ $(git diff | wc -l) -ne 0 ]; then
|
||||
echo -e "\nError! CMake code not formatted. Run cmake-format...\n"
|
||||
echo -e "\nFiles:\n"
|
||||
@@ -53,8 +58,7 @@ jobs:
|
||||
fi
|
||||
|
||||
source:
|
||||
runs-on: ubuntu-latest
|
||||
container: rocm/dev-ubuntu-22.04:latest
|
||||
runs-on: ubuntu-22.04
|
||||
env:
|
||||
ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
@@ -63,24 +67,31 @@ jobs:
|
||||
with:
|
||||
sparse-checkout: projects/rocprofiler-sdk
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
DISTRIB_CODENAME=$(cat /etc/lsb-release | grep DISTRIB_CODENAME | awk -F '=' '{print $NF}')
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y software-properties-common python3 python3-pip build-essential
|
||||
sudo apt install -y wget curl clang-format-11
|
||||
|
||||
- name: Extract branch name
|
||||
shell: bash
|
||||
run: |
|
||||
echo "branch=${GITHUB_HEAD_REF:-${GITHUB_HEAD_REF#refs/heads/}}" >> $GITHUB_OUTPUT
|
||||
id: extract_branch
|
||||
|
||||
- name: Run clang-format
|
||||
- name: Install dependencies
|
||||
working-directory: projects/rocprofiler-sdk
|
||||
run: |
|
||||
DISTRIB_CODENAME=$(cat /etc/lsb-release | grep DISTRIB_CODENAME | awk -F '=' '{print $NF}')
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y software-properties-common python3 python3-pip
|
||||
python3 -m pip install -r requirements.txt
|
||||
|
||||
- name: Run clang-format
|
||||
working-directory: projects/rocprofiler-sdk
|
||||
run: |
|
||||
FORMAT_FILES=$(find samples source tests benchmark -type f | egrep '\.(h|hpp|hh|c|cc|cpp)(|\.in)$')
|
||||
command -v git
|
||||
command -v clang-format
|
||||
git --version
|
||||
clang-format --version
|
||||
set +e
|
||||
FILES=$(find samples source tests benchmark -type f | egrep '\.(h|hpp|hh|c|cc|cpp)(|\.in)$')
|
||||
FORMAT_OUT=$(clang-format-11 -i ${FILES})
|
||||
FORMAT_OUT=$(clang-format -i ${FORMAT_FILES})
|
||||
git status
|
||||
if [ $(git diff | wc -l) -ne 0 ]; then
|
||||
echo -e "\nError! Code not formatted. Run clang-format (version 11)...\n"
|
||||
echo -e "\nFiles:\n"
|
||||
@@ -115,13 +126,14 @@ jobs:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: projects/rocprofiler-sdk
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install black
|
||||
python -m pip install -r requirements.txt
|
||||
|
||||
- name: black format
|
||||
working-directory: projects/rocprofiler-sdk
|
||||
run: |
|
||||
cd projects/rocprofiler-sdk
|
||||
black .
|
||||
if [ $(git diff | wc -l) -ne 0 ]; then
|
||||
echo -e "\nError! Python code not formatted. Run black...\n"
|
||||
@@ -140,8 +152,15 @@ jobs:
|
||||
with:
|
||||
sparse-checkout: projects/rocprofiler-sdk
|
||||
|
||||
- name: Extract branch name
|
||||
shell: bash
|
||||
run: |
|
||||
echo "branch=${GITHUB_HEAD_REF:-${GITHUB_HEAD_REF#refs/heads/}}" >> $GITHUB_OUTPUT
|
||||
id: extract_branch
|
||||
|
||||
- name: Find missing new line
|
||||
shell: bash
|
||||
working-directory: projects/rocprofiler-sdk
|
||||
run: |
|
||||
OUTFILE=missing_newline.txt
|
||||
for i in $(find source tests samples benchmark docker cmake -type f | egrep -v '\.(bin|png|csv)$|source/docs/_(build|doxygen)'); do VAL=$(tail -c 1 ${i}); if [ -n "${VAL}" ]; then echo "- ${i}" >> ${OUTFILE}; fi; done
|
||||
|
||||
@@ -31,13 +31,13 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
working-directory: projects/rocprofiler-sdk
|
||||
run: |
|
||||
cd projects/rocprofiler-sdk
|
||||
python -m pip install --upgrade pip
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
- name: Lint with flake8
|
||||
working-directory: projects/rocprofiler-sdk
|
||||
run: |
|
||||
cd projects/rocprofiler-sdk
|
||||
# stop the build if there are Python syntax errors or undefined names
|
||||
flake8 source --count --show-source --statistics --select=E9,F63,F7,F82
|
||||
# flake8 options are defined in .flake8
|
||||
|
||||
@@ -319,7 +319,7 @@ write_memory_allocation_csv(
|
||||
memory_alloc_gen,
|
||||
[](CsvManager& cm, CsvType type, const rocpd::types::memory_allocation& malloc) {
|
||||
std::string normalized_type = malloc.type;
|
||||
if (normalized_type == "ALLOC")
|
||||
if(normalized_type == "ALLOC")
|
||||
{
|
||||
normalized_type = "ALLOCATE";
|
||||
}
|
||||
|
||||
+52
-233
@@ -189,14 +189,13 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"SQ",
|
||||
"9",
|
||||
"<None>",
|
||||
"Count number of waves sent <32 active threads sent to SQs. "
|
||||
"This value represents the number of waves that an each individual SIMD has enqueued during "
|
||||
"the collection timeframe (for dispatch profiling this is the timeframe of kernel execution, "
|
||||
"for agent profiling it is the timeframe between start_context and read counter data) with "
|
||||
"less than 32 threads. A sum of all SQ_WAVES_LT_32 values will give the total number of "
|
||||
"waves with 32 threads enqueued during the collection timeframe by the application. "
|
||||
"Returns one value per-SE (aggregates of SIMD values). "
|
||||
"Useful for checking for wavefront occupancy."},
|
||||
"Count number of waves sent <32 active threads sent to SQs. This value represents the "
|
||||
"number of waves that an each individual SIMD has enqueued during the collection timeframe "
|
||||
"(for dispatch profiling this is the timeframe of kernel execution, for agent profiling it "
|
||||
"is the timeframe between start_context and read counter data) with less than 32 threads. A "
|
||||
"sum of all SQ_WAVES_LT_32 values will give the total number of waves with 32 threads "
|
||||
"enqueued during the collection timeframe by the application. Returns one value per-SE "
|
||||
"(aggregates of SIMD values). Useful for checking for wavefront occupancy."},
|
||||
{"TCC_ALL_TC_OP_WB_WRITEBACK",
|
||||
"TCC",
|
||||
"73",
|
||||
@@ -222,11 +221,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"291",
|
||||
"<None>",
|
||||
"Number of cache hits. (per-SQ, per-Bank, nondeterministic)"},
|
||||
{"CPC_CPC_TCIU_IDLE",
|
||||
"CPC",
|
||||
"29",
|
||||
"<None>",
|
||||
"CPC TCIU interface Idle."},
|
||||
{"CPC_CPC_TCIU_IDLE", "CPC", "29", "<None>", "CPC TCIU interface Idle."},
|
||||
{"SPI_CSN_WAVE",
|
||||
"SPI",
|
||||
"52",
|
||||
@@ -276,11 +271,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"85",
|
||||
"<None>",
|
||||
"Arb cycles with CSn req and no CSn alloc. Source is RA0"},
|
||||
{"CPC_ME1_DC0_SPI_BUSY",
|
||||
"CPC",
|
||||
"33",
|
||||
"<None>",
|
||||
"CPC Me1 Processor Busy."},
|
||||
{"CPC_ME1_DC0_SPI_BUSY", "CPC", "33", "<None>", "CPC Me1 Processor Busy."},
|
||||
{"SQ_WAVES_RESTORED",
|
||||
"SQ",
|
||||
"159",
|
||||
@@ -292,11 +283,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"counter data). Context saving/restoring is a slow operation and should be limited. "
|
||||
"High values can also indicate that stalling may be taking place (waiting for free "
|
||||
"register space). Returns one value per-SE (aggregates of SIMD values)."},
|
||||
{"CPF_CPF_TCIU_IDLE",
|
||||
"CPF",
|
||||
"27",
|
||||
"<None>",
|
||||
"CPF TCIU interface Idle."},
|
||||
{"CPF_CPF_TCIU_IDLE", "CPF", "27", "<None>", "CPF TCIU interface Idle."},
|
||||
{"TCP_TCC_ATOMIC_WITH_RET_REQ",
|
||||
"TCP",
|
||||
"71",
|
||||
@@ -357,16 +344,8 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"<None>",
|
||||
"One of the Compute UTCL1s is stalled waiting on translation, XNACK or PENDING "
|
||||
"response."},
|
||||
{"TCC_CYCLE",
|
||||
"TCC",
|
||||
"1",
|
||||
"<None>",
|
||||
"Number of cycles. Not windowable."},
|
||||
{"TCP_GATE_EN2",
|
||||
"TCP",
|
||||
"1",
|
||||
"<None>",
|
||||
"TCP core clocks are turned on. Not Windowed."},
|
||||
{"TCC_CYCLE", "TCC", "1", "<None>", "Number of cycles. Not windowable."},
|
||||
{"TCP_GATE_EN2", "TCP", "1", "<None>", "TCP core clocks are turned on. Not Windowed."},
|
||||
{"TCC_WRITEBACK",
|
||||
"TCC",
|
||||
"22",
|
||||
@@ -393,11 +372,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"79",
|
||||
"<None>",
|
||||
"Total write requests with UC mtype from this TCP to all TCCs"},
|
||||
{"TCP_UTCL1_TRANSLATION_MISS",
|
||||
"TCP",
|
||||
"48",
|
||||
"<None>",
|
||||
"Total utcl1 translation misses"},
|
||||
{"TCP_UTCL1_TRANSLATION_MISS", "TCP", "48", "<None>", "Total utcl1 translation misses"},
|
||||
{"GRBM_TA_BUSY",
|
||||
"GRBM",
|
||||
"13",
|
||||
@@ -445,11 +420,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"<None>",
|
||||
"Number of wave-cycles spent waiting for anything (per-simd, nondeterministic). "
|
||||
"Units in quad-cycles(4 cycles)"},
|
||||
{"SQ_CYCLES",
|
||||
"SQ",
|
||||
"2",
|
||||
"<None>",
|
||||
"Clock cycles. Value is returned per-SIMD."},
|
||||
{"SQ_CYCLES", "SQ", "2", "<None>", "Clock cycles. Value is returned per-SIMD."},
|
||||
{"GRBM_SPI_BUSY",
|
||||
"GRBM",
|
||||
"11",
|
||||
@@ -492,11 +463,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"75",
|
||||
"<None>",
|
||||
"Total read requests with NC mtype from this TCP to all TCCs"},
|
||||
{"TCP_TD_TCP_STALL_CYCLES",
|
||||
"TCP",
|
||||
"7",
|
||||
"<None>",
|
||||
"TD stalls TCP"},
|
||||
{"TCP_TD_TCP_STALL_CYCLES", "TCP", "7", "<None>", "TD stalls TCP"},
|
||||
{"SQ_INSTS_SENDMSG",
|
||||
"SQ",
|
||||
"40",
|
||||
@@ -532,11 +499,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"<None>",
|
||||
"Number of cycles a EA write request was stalled because the interface was out of "
|
||||
"DRAM credits."},
|
||||
{"TCC_WRITE",
|
||||
"TCC",
|
||||
"13",
|
||||
"<None>",
|
||||
"Number of write requests."},
|
||||
{"TCC_WRITE", "TCC", "13", "<None>", "Number of write requests."},
|
||||
{"SPI_RA_VGPR_SIMD_FULL_CSN",
|
||||
"SPI",
|
||||
"109",
|
||||
@@ -683,11 +646,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"The sum of the number of TCC/EA read requests in flight. This is primarily meant "
|
||||
"for measure average EA read latency. Average read latency = "
|
||||
"TCC_PERF_SEL_EA_RDREQ_LEVEL/TCC_PERF_SEL_EA_RDREQ."},
|
||||
{"TA_BUFFER_TOTAL_CYCLES",
|
||||
"TA",
|
||||
"49",
|
||||
"<None>",
|
||||
"Number of buffer cycles issued to TC."},
|
||||
{"TA_BUFFER_TOTAL_CYCLES", "TA", "49", "<None>", "Number of buffer cycles issued to TC."},
|
||||
{"SQ_WAIT_INST_ANY",
|
||||
"SQ",
|
||||
"61",
|
||||
@@ -708,11 +667,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"103",
|
||||
"<None>",
|
||||
"Number of TCC/EA write requests (either 32-byte of 64-byte) destined for DRAM (MC)."},
|
||||
{"TCP_TCR_TCP_STALL_CYCLES",
|
||||
"TCP",
|
||||
"8",
|
||||
"<None>",
|
||||
"TCR stalls TCP_TCR_req interface"},
|
||||
{"TCP_TCR_TCP_STALL_CYCLES", "TCP", "8", "<None>", "TCR stalls TCP_TCR_req interface"},
|
||||
{"TCP_TCC_RW_READ_REQ",
|
||||
"TCP",
|
||||
"85",
|
||||
@@ -745,73 +700,37 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"<None>",
|
||||
"Number of instruction fetch requests from L1I (instruction) cache. This is a value "
|
||||
"returned per-SIMD."},
|
||||
{"TCP_TCC_READ_REQ",
|
||||
"TCP",
|
||||
"69",
|
||||
"<None>",
|
||||
"Total read requests from TCP to all TCCs"},
|
||||
{"TCP_TCC_READ_REQ", "TCP", "69", "<None>", "Total read requests from TCP to all TCCs"},
|
||||
{"SQC_DCACHE_REQ",
|
||||
"SQ",
|
||||
"290",
|
||||
"<None>",
|
||||
"Number of requests (post-bank-serialization). (per-SQ, per-Bank)"},
|
||||
{"CPC_CPC_STAT_STALL",
|
||||
"CPC",
|
||||
"27",
|
||||
"<None>",
|
||||
"CPC Stalled."},
|
||||
{"TCP_GATE_EN1",
|
||||
"TCP",
|
||||
"0",
|
||||
"<None>",
|
||||
"TCP interface clocks are turned on. Not Windowed."},
|
||||
{"TCP_PENDING_STALL_CYCLES",
|
||||
"TCP",
|
||||
"22",
|
||||
"<None>",
|
||||
"Stall due to data pending from L2"},
|
||||
{"CPC_CPC_STAT_STALL", "CPC", "27", "<None>", "CPC Stalled."},
|
||||
{"TCP_GATE_EN1", "TCP", "0", "<None>", "TCP interface clocks are turned on. Not Windowed."},
|
||||
{"TCP_PENDING_STALL_CYCLES", "TCP", "22", "<None>", "Stall due to data pending from L2"},
|
||||
{"SQC_DCACHE_MISSES_DUPLICATE",
|
||||
"SQ",
|
||||
"293",
|
||||
"<None>",
|
||||
"Number of misses that were duplicates (access to a non-resident, miss pending CL). "
|
||||
"(per-SQ, per-Bank, nondeterministic)"},
|
||||
{"CPF_CPF_STAT_IDLE",
|
||||
"CPF",
|
||||
"24",
|
||||
"<None>",
|
||||
"CPF Idle."},
|
||||
{"TCP_VOLATILE",
|
||||
"TCP",
|
||||
"28",
|
||||
"<None>",
|
||||
"Total number of L1 volatile pixels/buffers from TA"},
|
||||
{"CPC_CPC_TCIU_BUSY",
|
||||
"CPC",
|
||||
"28",
|
||||
"<None>",
|
||||
"CPC TCIU interface Busy."},
|
||||
{"CPF_CPF_STAT_IDLE", "CPF", "24", "<None>", "CPF Idle."},
|
||||
{"TCP_VOLATILE", "TCP", "28", "<None>", "Total number of L1 volatile pixels/buffers from TA"},
|
||||
{"CPC_CPC_TCIU_BUSY", "CPC", "28", "<None>", "CPC TCIU interface Busy."},
|
||||
{"SQC_DCACHE_REQ_READ_2",
|
||||
"SQ",
|
||||
"324",
|
||||
"<None>",
|
||||
"Number of constant cache 2 dw read requests. (per-SQ)"},
|
||||
{"CPC_CPC_STAT_BUSY",
|
||||
"CPC",
|
||||
"25",
|
||||
"<None>",
|
||||
"CPC Busy."},
|
||||
{"CPC_CPC_STAT_BUSY", "CPC", "25", "<None>", "CPC Busy."},
|
||||
{"TCP_TCP_LATENCY",
|
||||
"TCP",
|
||||
"65",
|
||||
"<None>",
|
||||
"Total TCP wave latency (from first clock of wave entering to first clock of wave "
|
||||
"leaving), divide by TA_TCP_STATE_READ to avg wave latency"},
|
||||
{"TCP_UTCL1_TRANSLATION_HIT",
|
||||
"TCP",
|
||||
"49",
|
||||
"<None>",
|
||||
"Total utcl1 translation hits"},
|
||||
{"TCP_UTCL1_TRANSLATION_HIT", "TCP", "49", "<None>", "Total utcl1 translation hits"},
|
||||
{"SQ_INST_LEVEL_SMEM",
|
||||
"SQ",
|
||||
"43",
|
||||
@@ -841,11 +760,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"25",
|
||||
"<None>",
|
||||
"Count the wavefronts with opcode = load, include atomics and store."},
|
||||
{"GRBM_EA_BUSY",
|
||||
"GRBM",
|
||||
"35",
|
||||
"<None>",
|
||||
"The Efficiency Arbiter (EA) block is busy."},
|
||||
{"GRBM_EA_BUSY", "GRBM", "35", "<None>", "The Efficiency Arbiter (EA) block is busy."},
|
||||
{"SPI_RA_WVLIM_STALL_CSN",
|
||||
"SPI",
|
||||
"133",
|
||||
@@ -862,26 +777,14 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"<None>",
|
||||
"TD is processing or waiting for data. Perf_Windowing not supported for this "
|
||||
"counter."},
|
||||
{"SQC_ICACHE_REQ",
|
||||
"SQ",
|
||||
"270",
|
||||
"<None>",
|
||||
"Number of requests. (per-SQ, per-Bank)"},
|
||||
{"TCC_ATOMIC",
|
||||
"TCC",
|
||||
"14",
|
||||
"<None>",
|
||||
"Number of atomic requests of all types."},
|
||||
{"SQC_ICACHE_REQ", "SQ", "270", "<None>", "Number of requests. (per-SQ, per-Bank)"},
|
||||
{"TCC_ATOMIC", "TCC", "14", "<None>", "Number of atomic requests of all types."},
|
||||
{"TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES",
|
||||
"TCP",
|
||||
"13",
|
||||
"<None>",
|
||||
"Tagram conflict stall on an atomic"},
|
||||
{"CPF_CPF_STAT_BUSY",
|
||||
"CPF",
|
||||
"23",
|
||||
"<None>",
|
||||
"CPF Busy."},
|
||||
{"CPF_CPF_STAT_BUSY", "CPF", "23", "<None>", "CPF Busy."},
|
||||
{"TCC_EA0_WRREQ_LEVEL",
|
||||
"TCC",
|
||||
"35",
|
||||
@@ -968,11 +871,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"SQ_WAVES_LT_16 values will give the total number of waves with 16 threads enqueued "
|
||||
"during the collection timeframe by the application. Returns one value per-SE "
|
||||
"(aggregates of SIMD values). Useful for checking for wavefront occupancy."},
|
||||
{"SQC_DCACHE_ATOMIC",
|
||||
"SQ",
|
||||
"298",
|
||||
"<None>",
|
||||
"Number of atomic requests. (per-SQ, per-Bank)"},
|
||||
{"SQC_DCACHE_ATOMIC", "SQ", "298", "<None>", "Number of atomic requests. (per-SQ, per-Bank)"},
|
||||
{"TCC_EA0_RDREQ_GMI_CREDIT_STALL",
|
||||
"TCC",
|
||||
"42",
|
||||
@@ -1021,11 +920,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"24",
|
||||
"<None>",
|
||||
"One of the UTCL1s is stalled waiting on translation, XNACK or PENDING response."},
|
||||
{"TCC_PROBE",
|
||||
"TCC",
|
||||
"9",
|
||||
"<None>",
|
||||
"Number of probe requests. Not windowable."},
|
||||
{"TCC_PROBE", "TCC", "9", "<None>", "Number of probe requests. Not windowable."},
|
||||
{"TA_BUFFER_ATOMIC_WAVEFRONTS",
|
||||
"TA",
|
||||
"47",
|
||||
@@ -1042,16 +937,8 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"100",
|
||||
"<None>",
|
||||
"Number of flat opcode wavfronts processed by the TA."},
|
||||
{"TA_TOTAL_WAVEFRONTS",
|
||||
"TA",
|
||||
"32",
|
||||
"<None>",
|
||||
"Total number of wavefronts processed by TA."},
|
||||
{"CPC_CPC_STAT_IDLE",
|
||||
"CPC",
|
||||
"26",
|
||||
"<None>",
|
||||
"CPC Idle."},
|
||||
{"TA_TOTAL_WAVEFRONTS", "TA", "32", "<None>", "Total number of wavefronts processed by TA."},
|
||||
{"CPC_CPC_STAT_IDLE", "CPC", "26", "<None>", "CPC Idle."},
|
||||
{"CPC_CPC_UTCL2IU_STALL",
|
||||
"CPC",
|
||||
"32",
|
||||
@@ -1099,31 +986,15 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"28",
|
||||
"<None>",
|
||||
"Any of the Texture Cache Blocks (TCP/TCI/TCA/TCC) are busy."},
|
||||
{"GRBM_CPC_BUSY",
|
||||
"GRBM",
|
||||
"30",
|
||||
"<None>",
|
||||
"The Command Processor Compute (CPC) is busy."},
|
||||
{"TCP_UTCL1_PERMISSION_MISS",
|
||||
"TCP",
|
||||
"50",
|
||||
"<None>",
|
||||
"Total utcl1 permission misses"},
|
||||
{"GRBM_CPC_BUSY", "GRBM", "30", "<None>", "The Command Processor Compute (CPC) is busy."},
|
||||
{"TCP_UTCL1_PERMISSION_MISS", "TCP", "50", "<None>", "Total utcl1 permission misses"},
|
||||
{"SPI_RA_BULKY_CU_FULL_CSN",
|
||||
"SPI",
|
||||
"125",
|
||||
"<None>",
|
||||
"Sum of CU where BULKY can't take csn wave when !fits. Source is RA0"},
|
||||
{"TCP_TA_TCP_STATE_READ",
|
||||
"TCP",
|
||||
"27",
|
||||
"<None>",
|
||||
"Number of state reads"},
|
||||
{"TCP_TCC_WRITE_REQ",
|
||||
"TCP",
|
||||
"70",
|
||||
"<None>",
|
||||
"Total write requests from TCP to all TCCs"},
|
||||
{"TCP_TA_TCP_STATE_READ", "TCP", "27", "<None>", "Number of state reads"},
|
||||
{"TCP_TCC_WRITE_REQ", "TCP", "70", "<None>", "Total write requests from TCP to all TCCs"},
|
||||
{"TCP_TCC_RW_ATOMIC_REQ",
|
||||
"TCP",
|
||||
"87",
|
||||
@@ -1181,11 +1052,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"271",
|
||||
"<None>",
|
||||
"Number of cache hits. (per-SQ, per-Bank, nondeterministic)"},
|
||||
{"TCA_CYCLE",
|
||||
"TCA",
|
||||
"1",
|
||||
"<None>",
|
||||
"Number of cycles. Not windowable."},
|
||||
{"TCA_CYCLE", "TCA", "1", "<None>", "Number of cycles. Not windowable."},
|
||||
{"SPI_CSN_BUSY",
|
||||
"SPI",
|
||||
"48",
|
||||
@@ -1238,11 +1105,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"<None>",
|
||||
"Total number of TC requests that were issued by instruction and constant caches. "
|
||||
"(No-Masking, nondeterministic)"},
|
||||
{"CPF_CPF_STAT_STALL",
|
||||
"CPF",
|
||||
"25",
|
||||
"<None>",
|
||||
"CPF Stalled."},
|
||||
{"CPF_CPF_STAT_STALL", "CPF", "25", "<None>", "CPF Stalled."},
|
||||
{"TCC_ALL_TC_OP_INV_EVICT",
|
||||
"TCC",
|
||||
"80",
|
||||
@@ -1268,16 +1131,8 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"The number of cycles needed to send addr and cmd data for VMEM read instructions. "
|
||||
"This value is returned on a per-SE (aggregate of values in SIMDs in the SE) basis "
|
||||
"with units in quad-cycles(4 cycles)."},
|
||||
{"CPC_CPC_UTCL2IU_BUSY",
|
||||
"CPC",
|
||||
"30",
|
||||
"<None>",
|
||||
"CPC UTCL2 interface Busy."},
|
||||
{"TCP_UTCL1_REQUEST",
|
||||
"TCP",
|
||||
"47",
|
||||
"<None>",
|
||||
"Total CLIENT_UTCL1 NORMAL requests"},
|
||||
{"CPC_CPC_UTCL2IU_BUSY", "CPC", "30", "<None>", "CPC UTCL2 interface Busy."},
|
||||
{"TCP_UTCL1_REQUEST", "TCP", "47", "<None>", "Total CLIENT_UTCL1 NORMAL requests"},
|
||||
{"CPF_CPF_TCIU_STALL",
|
||||
"CPF",
|
||||
"28",
|
||||
@@ -1296,11 +1151,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"120",
|
||||
"<None>",
|
||||
"Sum of CU where LDS can't take csn wave when !fits. Source is RA0"},
|
||||
{"TD_ATOMIC_WAVEFRONT",
|
||||
"TD",
|
||||
"26",
|
||||
"<None>",
|
||||
"Count the wavefronts with opcode = atomic."},
|
||||
{"TD_ATOMIC_WAVEFRONT", "TD", "26", "<None>", "Count the wavefronts with opcode = atomic."},
|
||||
{"SQ_INSTS_EXP_GDS",
|
||||
"SQ",
|
||||
"38",
|
||||
@@ -1316,22 +1167,14 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"<None>",
|
||||
"Valid request stalled TC request interface (no-credits). (No-Masking, "
|
||||
"nondeterministic, unwindowed)"},
|
||||
{"CPF_CPF_TCIU_BUSY",
|
||||
"CPF",
|
||||
"26",
|
||||
"<None>",
|
||||
"CPF TCIU interface Busy."},
|
||||
{"CPF_CPF_TCIU_BUSY", "CPF", "26", "<None>", "CPF TCIU interface Busy."},
|
||||
{"TCC_EA0_WRREQ_GMI_CREDIT_STALL",
|
||||
"TCC",
|
||||
"32",
|
||||
"<None>",
|
||||
"Number of cycles a EA write request was stalled because the interface was out of "
|
||||
"GMI credits."},
|
||||
{"GRBM_CPF_BUSY",
|
||||
"GRBM",
|
||||
"31",
|
||||
"<None>",
|
||||
"The Command Processor Fetchers (CPF) is busy."},
|
||||
{"GRBM_CPF_BUSY", "GRBM", "31", "<None>", "The Command Processor Fetchers (CPF) is busy."},
|
||||
{"SQ_WAVES_LT_48",
|
||||
"SQ",
|
||||
"8",
|
||||
@@ -1350,11 +1193,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"<None>",
|
||||
"Number of transactions going over the TC_EA_wrreq interface that are actually "
|
||||
"atomic requests."},
|
||||
{"TD_TC_STALL",
|
||||
"TD",
|
||||
"15",
|
||||
"<None>",
|
||||
"TD is stalled waiting for TC data."},
|
||||
{"TD_TC_STALL", "TD", "15", "<None>", "TD is stalled waiting for TC data."},
|
||||
{"SPI_RA_TGLIM_CU_FULL_CSN",
|
||||
"SPI",
|
||||
"127",
|
||||
@@ -1370,11 +1209,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"39",
|
||||
"<None>",
|
||||
"Total number of atomic without return pixels/buffers from TA"},
|
||||
{"CPC_ME1_BUSY_FOR_PACKET_DECODE",
|
||||
"CPC",
|
||||
"13",
|
||||
"<None>",
|
||||
"Me1 busy for packet decode."},
|
||||
{"CPC_ME1_BUSY_FOR_PACKET_DECODE", "CPC", "13", "<None>", "Me1 busy for packet decode."},
|
||||
{"SQ_INSTS",
|
||||
"SQ",
|
||||
"25",
|
||||
@@ -1388,11 +1223,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"74",
|
||||
"<None>",
|
||||
"Number of evictions due to requests that are not invalidate or probe requests."},
|
||||
{"CPC_CPC_UTCL2IU_IDLE",
|
||||
"CPC",
|
||||
"31",
|
||||
"<None>",
|
||||
"CPC UTCL2 interface Idle."},
|
||||
{"CPC_CPC_UTCL2IU_IDLE", "CPC", "31", "<None>", "CPC UTCL2 interface Idle."},
|
||||
{"TCC_REQ",
|
||||
"TCC",
|
||||
"3",
|
||||
@@ -1409,11 +1240,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"but that is not the case for this counter. Probes can stall the pipeline at a "
|
||||
"variety of places, and there is no single point that can reasonably measure the "
|
||||
"total stalls accurately."},
|
||||
{"TD_STORE_WAVEFRONT",
|
||||
"TD",
|
||||
"27",
|
||||
"<None>",
|
||||
"Count the wavefronts with opcode = store."},
|
||||
{"TD_STORE_WAVEFRONT", "TD", "27", "<None>", "Count the wavefronts with opcode = store."},
|
||||
{"TA_BUFFER_COALESCED_READ_CYCLES",
|
||||
"TA",
|
||||
"52",
|
||||
@@ -1465,11 +1292,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"319",
|
||||
"<None>",
|
||||
"Number of cycles client191 sent a request to this TCC."},
|
||||
{"TCC_BUBBLE",
|
||||
"TCC",
|
||||
"56",
|
||||
"<None>",
|
||||
"Number of 128-byte read requests sent to EA."}}}};
|
||||
{"TCC_BUBBLE", "TCC", "56", "<None>", "Number of 128-byte read requests sent to EA."}}}};
|
||||
|
||||
static const std::unordered_map<std::string, std::vector<std::vector<std::string>>> derived_gfx908 =
|
||||
{{"gfx908",
|
||||
@@ -2153,11 +1976,7 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
"",
|
||||
"reduce(TCC_BUSY,sum)",
|
||||
"Number of cycles we have a request pending. Not windowable. Sum over TCC instances."},
|
||||
{"TCC_BUSY_avr",
|
||||
"",
|
||||
"",
|
||||
"reduce(TCC_BUSY,avr)",
|
||||
"TCC_BUSY avr over all memory channels."},
|
||||
{"TCC_BUSY_avr", "", "", "reduce(TCC_BUSY,avr)", "TCC_BUSY avr over all memory channels."},
|
||||
{"TCC_PROBE_sum",
|
||||
"",
|
||||
"",
|
||||
|
||||
@@ -237,6 +237,11 @@ def generate_dashboard_script(args):
|
||||
STRICT_SUBMIT = 1 if args.require_cdash_submission else 0
|
||||
ARGN = "${ARGN}"
|
||||
SUBMIT_ERR = "${_cdash_submit_err}"
|
||||
REPO_SOURCE_DIR = (
|
||||
os.path.dirname(os.path.dirname((SOURCE_DIR)))
|
||||
if not os.path.exists(os.path.join(SOURCE_DIR, ".git"))
|
||||
else SOURCE_DIR
|
||||
)
|
||||
|
||||
if args.memcheck == "ThreadSanitizer":
|
||||
MEMCHECK = 0
|
||||
@@ -279,7 +284,7 @@ def generate_dashboard_script(args):
|
||||
_script += f"""
|
||||
set(STAGES "{STAGES}")
|
||||
ctest_start({DASHBOARD_MODE})
|
||||
ctest_update(SOURCE "{SOURCE_DIR}" RETURN_VALUE _update_ret
|
||||
ctest_update(SOURCE "{REPO_SOURCE_DIR}" RETURN_VALUE _update_ret
|
||||
CAPTURE_CMAKE_ERROR _update_err)
|
||||
ctest_configure(BUILD "{BINARY_DIR}" RETURN_VALUE _configure_ret)
|
||||
dashboard_submit(PARTS Start Update Configure RETURN_VALUE _submit_ret)
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle